view tools/ilmn_pacbio/smrtpipe_galaxy.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
line wrap: on
line source

#!/usr/bin/python
import sys
import os
import subprocess
import optparse as op
import xml.etree.cElementTree as et

TRACE=False
#
# Turn on tracing to dump out __input__.xml and __settings__.xml somewhere
#
#TRACE=True
#TRACE_PATH='/home/UNIXHOME/jsorenson'

class SmrtpipeGalaxy:
    """Wrapper for running smrtpipe under galaxy"""
    def __init__( self, argv ):
        self.__parseOptions( argv )

    def __parseOptions( self, argv ):
        usage = 'Usage: %prog [--help] [options] smrtpipe.ini'
        parser = op.OptionParser( usage=usage, description=SmrtpipeGalaxy.__doc__ )
        parser.add_option( "--output",
                           help="Designate a file generated by smrtpipe as the expected output for galaxy" )
        parser.add_option( "--nproc", type="int",
                           help="Number of processes to use (-D NPROC)" )
        parser.add_option( "--galaxy_output",
                           help="File name provided by galaxy where output should be placed" )
        parser.add_option( "--dry_run", action="store_true", 
                           help="Create auxiliary XML files and exit" )
        parser.add_option( "--dat_extension", 
                           help="Soft link .dat files to have this extension (some pipelines require certain extensions)" ) 

        parser.set_defaults( output=None, dry_run=False, galaxy_output=None,
            dat_extension=None, nproc=0 )
        self.options, self.args = parser.parse_args( argv )

        if len(self.args)!=2:
            parser.error( 'Expected 1 argument' )

        self.configFile = self.args[1]

    def __parseConfig( self ):
        infile = open( self.configFile, 'r' )
        section = None
        sections = []
        for line in infile:
            l = line.strip()
            if len(l)==0 or line.startswith('#'):
                continue
            if l.startswith('[') and l.endswith(']'):
                section = section_factory( l[1:-1] )
                sections.append(section)
                continue
            if section is None:
                continue
            if '=' in l:
                section.addParameterLine(l)
            else:
                section.addLine(l)
        infile.close()
        return sections

    def transferOutput( self ):
        if not self.options.output or not self.options.galaxy_output:
            return True, ''
        if not os.path.exists(self.options.output):
            return False, "Can't find file %s (job error?)" % self.options.output
        os.system( 'cp %s %s' % (self.options.output, self.options.galaxy_output ))
        return True, ''

    def run( self ):
        if not os.path.exists( self.configFile ):
            print >>sys.stderr, "Can't find config file %s" % self.configFile
            return 1

        sections = self.__parseConfig()

        if len(sections)==0:
            print >>sys.stderr, "No sections found in %s" % self.configFile
            return 1
        if sections[0].name != 'input':
            print >>sys.stderr, "No [input] section found in %s" % self.configFile
            return 1

        INPUT_FILE = '__input__.xml'
        SETTINGS_FILE = '__settings__.xml'

        sections[0].softLinkDats( self.options.dat_extension )
        inputXml = sections[0].makeXmlElement()
        write_xml_to_file( INPUT_FILE, inputXml )
        if TRACE:
            write_xml_to_file( os.path.join(TRACE_PATH,INPUT_FILE), inputXml )

        settings = et.Element( 'smrtpipeSettings' )
        for s in sections[1:]:
            s.makeXmlElement( settings )

        write_xml_to_file( SETTINGS_FILE, settings )
        if TRACE:
            write_xml_to_file( os.path.join(TRACE_PATH,SETTINGS_FILE), settings )

        nproc = '-D NPROC=%d' % self.options.nproc if self.options.nproc>0 else ''
        cmd = 'smrtpipe.py %s --params=%s xml:%s > smrtpipe.err 2>1' % \
            ( nproc, SETTINGS_FILE, INPUT_FILE )

        if self.options.dry_run:
            print 'Command to run:'
            print cmd
            return 0

        out, errCode, errMsg = backticks( cmd )
        if errCode!=0:
            print >>sys.stderr, "error while running: %s" % cmd
            print >>sys.stderr, errMsg
            if os.path.exists('log/smrtpipe.log'):
                print >>sys.stderr, 'Log:'
                infile = open('log/smrtpipe.log','r')
                for line in infile: sys.stderr.write(line)
                infile.close()
            return errCode

        success, errMsg = self.transferOutput()
        if not success:
            print >>sys.stderr, errMsg
            return 1

        return 0

def write_xml_to_file( fileName, root ):
    outfile = open( fileName, 'w' )
    outfile.write( '<?xml version="1.0"?>\n' )
    outfile.write( et.tostring(root) + '\n' )
    outfile.close()

def section_factory( name ):
    if name=='input':
        return InputSection(name)
    else:
        return Section(name)

class Section:
    def __init__( self, name ):
        self._name = name
        self._lines = []        
        self._vars = {}

    @property
    def name(self):
        return self._name

    def addLine( self, line ):
        self._lines.append(line)

    def addParameterLine( self, line ):
        self.addLine(line)
        i = line.find( '=' )
        key = line[:i].strip()
        value = line[i+1:].strip()
        self._vars[key] = value

    def makeXmlElement( self, settings ):
        if self._name=='global':
            root = et.SubElement( settings, "protocol", {'name':'generic'} )
        else:
            root = et.SubElement( settings, "module", {'name':self._name} )
        for k,v in self._vars.iteritems():
            param = et.SubElement( root, 'param', {'name':k} )
            val = et.SubElement( param, 'value' )
            val.text = v
        return None

    def __str__( self ):
        "for debugging"
        buffer = [ 'S { name=' ]
        buffer.append(self._name)
        buffer.append('; lines=%s' % ','.join(self._lines) )
        for k,v in self._vars.iteritems():
            buffer.append('; %s=%s' % (k,v) )
        buffer.append(' }')
        return ''.join(buffer)

class InputSection( Section ):
    def __init__( self, name ):
        Section.__init__(self,name)

    def softLinkDats( self, newExtension ):
        if not newExtension:
            return
        newLines = []
        for l in self._lines:
            if ':' in l:
                protocol = l[:l.find(':')+1]
                file = l[l.find(':')+1:]
            else:
                protocol = ''
                file = l
            if os.path.exists(file) and file.endswith('.dat'):
                newFile = '%s.%s' % ( file, newExtension )
                if not os.path.exists(newFile):
                    os.system( 'ln -s %s %s' % ( file, newFile ) )
                newLines.append(protocol+newFile)
            else:
                newLines.append(l)
        self._lines = newLines

    def makeXmlElement( self, parent=None ):
        root = et.Element( "pacbioAnalysisInputs" )
        data = et.SubElement( root, 'dataReferences' )
        iRef = 0
        for l in self._lines:
            def add(x,iRef):
                if len(x)==0: return iRef
                node = et.SubElement( data, 'url' )
                if ':' in x:
                    node.attrib[ 'ref' ] = x
                else:
                    node.attrib[ 'ref' ] = 'run:0000000-%04d' % iRef
                    node2 = et.SubElement( node, 'location' )
                    node2.text = x
                return iRef+1
            if l.endswith('fofn') and os.path.exists(l):
                infile = open(l,'r')
                for j,line in enumerate(infile): iRef=add(line.strip(),iRef)
                infile.close()
            else:
                iRef=add(l,iRef)
        return root

def backticks( cmd, merge_stderr=True ):
    """
    Simulates the perl backticks (``) command with error-handling support
    Returns ( command output as sequence of strings, error code, error message )
    """
    if merge_stderr:
        _stderr = subprocess.STDOUT
    else:
        _stderr = subprocess.PIPE

    p = subprocess.Popen( cmd, shell=True, stdin=subprocess.PIPE,
                          stdout=subprocess.PIPE, stderr=_stderr,
                          close_fds=True )

    out = [ l[:-1] for l in p.stdout.readlines() ]

    p.stdout.close()
    if not merge_stderr:
        p.stderr.close()

    # need to allow process to terminate
    p.wait()

    errCode = p.returncode and p.returncode or 0
    if p.returncode>0:
        errorMessage = os.linesep.join(out)
        output = []
    else:
        errorMessage = ''
        output = out
        
    return output, errCode, errorMessage

if __name__=='__main__':
    app = SmrtpipeGalaxy( sys.argv )
    sys.exit( app.run() )