view commons/launcher/RepeatMaskerProgramLauncher.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line source

#!/usr/bin/env python

##@file
# Launch RepeatMasker (pairwise alignment for repeat detection).


import os
import sys

from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher


class RepeatMaskerProgramLauncher( AbstractProgramLauncher ):
    """
    Launch RepeatMasker (pairwise alignment for repeat detection).
    """
    
    def __init__( self ):
        """
        Constructor.
        """
        AbstractProgramLauncher.__init__( self )
        self._prgName = "RepeatMasker"
        self._formatInFile = "fasta"
        self._sbjFile = ""
        self._nbProc = 1
        self._calcGc = False
        self._skipIs = False
        self._maskSsr = True
        self._onlySsr = False
        self._cmdLineSpecificOptions = "s:n:gblmo:"
        
        
    def getSpecificHelpAsString( self ):
        """
        Return the specific help as a string.
        """
        string = ""
        string += "\nspecific options:"
        string += "\n     -s: name of the subject file (repeats, format='fasta')"
        string += "\n     -n: nb of processors to use in parallel (default='%i')" % ( self.getNbProcessors() )
        string += "\n     -g: calculate the GC content"
        string += "\n     -b: skip bacterial insertion element check"
        string += "\n     -l: does not mask low-complexity DNA or simple repeats"
        string += "\n     -m: only masks low complex/simple repeats (no interspersed repeats)"
        string += "\n     -o: name of the output file"
        string += "\n         with -s: format='align', default=inFile+'.cat.align')"
        string += "\n         with -m: format='path', default=inFile+'.cat.path')"
        return string
    
    
    def setASpecificAttributeFromCmdLine( self, o, a="" ):
        """
        Set a specific attribute from the command-line arguments.
        """
        if o =="-s":
            self.setSubjectFile( a )
        elif o == "-n":
            self.setNbProcessors( a )
        elif o == "-g":
            self.setCalculateGCcontent()
        elif o == "-b":
            self.setSkipBacterialIsCheck()
        elif o == "-l":
            self.unsetMaskSsr()
        elif o == "-m":
            self.setOnlySsr()
        elif o == "-o":
            self.setOutputFile( a )
            
            
    def setSubjectFile( self, arg ):
        self._sbjFile = arg
        
        
    def setNbProcessors( self, arg ):
        self._nbProc = int(arg)
        
        
    def setCalculateGCcontent( self ):
        self._calcGc = True
        
        
    def setSkipBacterialIsCheck( self ):
        self._skipIs = True
        
        
    def unsetMaskSsr( self ):
        self._maskSsr = False
        
        
    def setOnlySsr( self ):
        self._onlySsr = True
        
        
    def getSubjectFile( self ):
        return self._sbjFile
    
    
    def getNbProcessors( self ):
        return self._nbProc
    
    
    def getCalculateGCcontent( self ):
        return self._calcGc
    
    
    def getSkipBacterialIsCheck( self ):
        return self._skipIs
    
    
    def getMaskSsr( self ):
        return self._maskSsr
    
    
    def getOnlySsr( self ):
        return self._onlySsr
    
    
    def checkSpecificAttributes( self ):
        """
        Check the specific attributes before running the program.
        """
        if ( self.getSubjectFile() == "" and not self.getOnlySsr() ) \
               or ( self.getSubjectFile() != "" and self.getOnlySsr() ):
            string = "ERROR: need to specify -s or -m"
            print string
            print self.getHelpAsString()
            sys.exit(1)
        if self.getOutputFile() == "":
            if not self.getOnlySsr():
                self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
            else:
                self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
                
                
    def setWrapperCommandLine( self ):
        """
        Set the command-line of the wrapper.
        Required for RepeatMaskerClusterLauncher.
        """
        self._wrpCmdLine = self.getWrapperName()
        self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
        if self.getSubjectFile() != "":
            self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() )
        self._wrpCmdLine += " -n %i"  %( self.getNbProcessors() )
        if self.getCalculateGCcontent():
            self._wrpCmdLine += " -g"
        if self.getSkipBacterialIsCheck():
            self._wrpCmdLine += " -b"
        if not self.getMaskSsr():
            self._wrpCmdLine += " -l"
        if self.getOnlySsr():
            self._wrpCmdLine += " -m"
        if self.getOutputFile() != "":
            self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
        if self.getClean():
            self._wrpCmdLine += " -c"
        if self.getVerbosityLevel() != 0:
            self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
            
            
    def setProgramCommandLine( self ):
        """
        Set the command-line of the program.
        """
        self._prgCmdLine = self.getProgramName()
        self._prgCmdLine += " -dir ."
        self._prgCmdLine += " -pa %i" % ( self.getNbProcessors() )
        if self.getCalculateGCcontent():
            self._prgCmdLine += " -gccalc"
        if self.getSkipBacterialIsCheck():
            self._prgCmdLine += " -no_is"
        if self.getMaskSsr():
            self._prgCmdLine += " -nolow"
        if self.getOnlySsr():
            self._prgCmdLine += " -int"
        if self.getSubjectFile() != "":
            self._prgCmdLine += " -lib %s" % ( self.getSubjectFile() )
        self._prgCmdLine += " %s" % ( self.getInputFile() )
        
        
    def setListFilesToKeep( self ):
        """
        Set the list of files to keep.
        """
        if self.getOutputFile() == "":
            if not self.getOnlySsr():
                self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
            else:
                self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
        self.appendFileToKeep( self.getOutputFile() )
        self.appendFileToKeep( "%s.cat" % ( self.getInputFile() ) )
        
        
    def setListFilesToRemove( self ):
        """
        Set the list of files to remove.
        """
        self.appendFileToRemove( "%s.stderr" % ( self.getInputFile() ) )
        self.appendFileToRemove( "%s.tbl" % ( self.getInputFile() ) )
        self.appendFileToRemove( "%s.ori.out" % ( self.getInputFile() ) )
        self.appendFileToRemove( "%s.masked" % ( self.getInputFile() ) )
        self.appendFileToRemove( "%s.out" % ( self.getInputFile() ) )
        self.appendFileToRemove( "%s.log" % ( self.getInputFile() ) )
        self.appendFileToRemove( "%s.ref" % ( self.getInputFile() ) )
        
        
    def convertCatIntoAlign( self ):
        """
        Convert a 'cat' file into the 'align' format.
        """
        cmd = os.environ["REPET_PATH"] + "/bin/RMcat2align.py"
        cmd += " -i %s.cat" % ( self.getInputFile() )
        cmd += " -o %s.cat.align" % ( self.getInputFile() )
        exitStatus = os.system( cmd )
        if exitStatus != 0:
            string = "ERROR while converting 'cat' file into 'align' format"
            print string
            sys.exit(1)
            
            
    def convertCatIntoPath( self ):
        """
        Convert a 'cat' file into the 'path' format.
        """
        cmd = os.environ["REPET_PATH"] + "/bin/RMcat2path.py"
        cmd += " -i %s.cat" % ( self.getInputFile() )
        cmd += " -o %s.cat.path" % ( self.getInputFile() )
        exitStatus = os.system( cmd )
        if exitStatus != 0:
            string = "ERROR while converting 'cat' file into 'path' format"
            print string
            sys.exit(1)
            
            
    def setSummary( self ):
        self._summary = "input file: %s" % ( self.getInputFile() )
        if self.getSubjectFile() != "":
            self._summary += "\nsubject file: %s" % ( self.getSubjectFile() )
        self._summary += "\nnb processors: %i" % ( self.getNbProcessors() )
        if self.getCalculateGCcontent():
            self._summary += "\ncalculate the GC content"
        if self.getSkipBacterialIsCheck():
            self._summary += "\nskip bacterial insertion element check"
        if self.getMaskSsr():
            self._summary += "\nmask low-complexity DNA or simple repeats"
        if self.getOnlySsr():
            self._summary = "\nonly masks low complex/simple repeats (no interspersed repeats)"
        if self.getOutputFile() == "":
            if not self.getMaskSsr():
                self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) )
            else:
                self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) )
        self._summary += "\noutput file: %s" % ( self.getOutputFile() )
        
        
    def run( self ):
        """
        Run the program.
        """
        self.start()
        
        self.setProgramCommandLine()
        cmd = self.getProgramCommandLine()
        if self.getVerbosityLevel() > 0:
            print "LAUNCH: %s" % ( cmd )
            sys.stdout.flush()
        exitStatus = os.system( cmd )
        if exitStatus != 0:
            string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
            print string
            sys.exit(1)
            
        if not self.getOnlySsr():
            self.convertCatIntoAlign()
        else:
            self.convertCatIntoPath()
            
        self.end()
        
        
if __name__ == "__main__":
    i = RepeatMaskerProgramLauncher()
    i.setAttributesFromCmdLine()
    i.run()