view commons/tools/GetMultAlignAndPhylogenyPerTErefSeq.py @ 19:9bcfa7936eec

Deleted selected files
author m-zytnicki
date Mon, 29 Apr 2013 03:23:29 -0400
parents 94ab73e8a190
children
line wrap: on
line source

#!/usr/bin/env python

##@file
# For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.
# usage: GetMultAlignAndPhylogenyPerTErefSeq.py [ options ]
# options:
#      -h: this help
#      -S: step (0: all steps [default], 1:file generation, 2:multiple alignements, 3:phylogenies)
#      -p: table with the annotations (format=path)
#      -s: table with the TE reference sequences (format=seq)
#      -g: table with the genome sequence (format=seq)
#      -r: name or file with TE reference sequence(s) (all by default)
#      -m: MSA method (default=Refalign/Map)
#      -l: minimum length of copies (default=100)
#      -n: number of longest copies to use (default=20)
#      -y: minimum  copy proportion compare to references (default=0.5)
#      -R: keep the reference sequence (only with Refalign)
#      -C: configuration file
#      -q: queue name
#      -c: clean
#      -d: temporary directory
#      -v: verbosity level (default=0/1)

import os
import sys
import glob
import ConfigParser

import pyRepet.launcher.programLauncher

from commons.core.coord.PathUtils import PathUtils
from commons.core.seq.FastaUtils import FastaUtils
from commons.core.coord.SetUtils import SetUtils
from commons.core.sql.TablePathAdaptator import TablePathAdaptator
from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator
from commons.tools.OrientSequences import OrientSequences
from ConfigParser import MissingSectionHeaderError
from commons.core.utils.RepetOptionParser import RepetOptionParser
from commons.core.LoggerFactory import LoggerFactory
from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
from commons.launcher import LaunchMap
from commons.core.sql.DbFactory import DbFactory
from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
from commons.core.launcher.Launcher import Launcher
from commons.core.utils.FileUtils import FileUtils


LOG_DEPTH = "repet.tools"

## For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.
#
class GetMultAlignAndPhylogenyPerTErefSeq(object):
    
    def __init__(self, pathTableName="",refSeqTableName="", genomeSeqTableName="", step=0, mSAmethod="RefAlign",keepRefseq=False, configFileName= "", clean = True, verbosity=3):
        """
        Constructor.
        """
        self.step = step
        self._pathTable = pathTableName
        self._refSeqTable = refSeqTableName
        self._genomeSeqTable = genomeSeqTableName
        self._TErefseq = ""
        self._MSAmethod = mSAmethod
        self._minCopyLength = 100
        self._nbLongestCopies = 20
        self._minPropCopy = 0.5
        self._keepRefseq = keepRefseq
        self.setConfigFileName(configFileName)
        self._queue = ""
        self._tmpDir = ""
        self._clean = clean
        self._verbosity = verbosity
        self._db = None
        self._tpaAnnot = None
        self._tsaRef = None
        self._pL = pyRepet.launcher.programLauncher.programLauncher()
        self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
              
    def _logAndRaise(self, errorMsg):
        self._log.error(errorMsg)
        raise Exception(errorMsg)
    
    def setAttributesFromCmdLine(self):
        desc = "For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.\n"
        #Commented: it's not true, Config File is mandatory!
#        desc += "Connection to the database parameters are retrieved from the environment"
        
        #TODO: format options as other scripts (have a look at LaunchTemplate)
        parser = RepetOptionParser(description = desc, epilog = "")
        parser.add_option("-S", "--step",      dest = "step" ,              action = "store", type = "int", help = "step (0: all steps [default], 1:file generation, 2:multiple alignments, 3:phylogenies)", default = 0 )
        parser.add_option("-p", "--pathTable", dest = "path",               action= "store",  type = "string", help = "(mandatory) table with the annotations (format=path)",  default = "")
        parser.add_option("-s", "--refSeqTable",dest = "refSeqTable",       action= "store", type = "string", help = "(mandatory) table with the TE reference sequences (format=seq)",  default = "")
        parser.add_option("-g", "--genomeSeqTable",dest = "genomeSeqTable",action= "store", type = "string", help = "(mandatory) table with the genome sequence (format=seq)",  default = "")
        parser.add_option("-r", "--TErefseq",dest = "TErefseq",         action= "store", type = "string", help = "name or file with TE reference sequence(s) (all by default)",  default = "")
        parser.add_option("-m", "--MSAmethod",dest = "MSAmethod",         action= "store", type = "string", help = "MSA method (default=RefAlign/Map)",  default = "RefAlign")
        parser.add_option("-l", "--minCopyLength",dest = "minCopyLength",         action= "store", type = "int", help = "minimum length of copies (default=100)",  default = 100)
        parser.add_option("-n", "--nbLongestCopies",dest = "nbLongestCopies",         action= "store", type = "int", help = "number of longest copies to use (default=20)",  default = 20)
        parser.add_option("-y", "--minPropCopy",dest = "minPropCopy",         action= "store", type = "float", help = "minimum  copy proportion compare to references (default=0.5)",  default = 0.5)
        parser.add_option("-R", "--keepRefseq",dest = "keepRefseq",         action="store_true", help = "keep the reference sequence (only with Refalign)",  default = False)
        parser.add_option("-C", "--config",    dest = "configFileName",     action = "store", type = "string", help = "(mandatory) config file name to set database connection", default = "")
        parser.add_option("-q", "--queue",dest = "queue",         action= "store", type = "string", help = "queue name",  default = "")
        parser.add_option("-c", "--clean", action="store_false", dest = "clean", help = "don't clean", default = True) 
        parser.add_option("-d", "--tmpDir",dest = "tmpDir",         action= "store", type = "string", help = "temporary directory",  default = "")
        parser.add_option("-v", "--verbosity",   dest = "verbosity",             action = "store", type = "int",    help = "verbosity level (default=0)", default = 0)
        options = parser.parse_args()[0]
        self._setAttributesFromOptions(options)
        
    def _setAttributesFromOptions(self, options):
        self.setConfigFileName(options.configFileName)
        self.setStep(options.step)
        self.setPathTable(options.path)
        self.setRefSeqTable(options.refSeqTable)
        self.setGenomeSeqTable(options.genomeSeqTable)
        self.setTErefseq(options.TErefseq)
        self.setMSAmethod(options.MSAmethod)
        self.setMinCopyLength(options.minCopyLength)
        self.setNbLongestCopies(options.nbLongestCopies)
        self.setMinPropCopy(options.minPropCopy)
        self.setKeepRefseq(options.keepRefseq)
        self.setQueue(options.queue)
        self.setClean(options.clean)
        self.setTmpDir(options.tmpDir)
        self.setVerbosity(options.verbosity)
        
    def setStep(self, step):
        self.step = step
       
    def setPathTable(self, path):
        self._pathTable = path
       
    def setRefSeqTable(self, refSeqTable):
        self._refSeqTable = refSeqTable
       
    def setGenomeSeqTable(self, genomeSeqTable):
        self._genomeSeqTable = genomeSeqTable
       
    def setTErefseq(self, TErefseq):
        self._TErefseq = TErefseq
       
    def setMSAmethod(self, MSAmethod):
        self._MSAmethod = MSAmethod
        
    def setMinCopyLength(self, minCopyLength):
        self._minCopyLength = minCopyLength
        
    def setNbLongestCopies(self, nbLongestCopies):
        self._nbLongestCopies = nbLongestCopies
        
    def setMinPropCopy(self, minPropCopy):
        self._minPropCopy = minPropCopy
        
    def setKeepRefseq(self, keepRefseq):
        self._keepRefseq = keepRefseq
        
    def setQueue(self, queue):
        self._queue = queue

    def setClean(self, clean):
        self._clean = clean
 
    def setTmpDir(self, tmpDir):
        self._tmpDir = tmpDir 
                
    def setVerbosity(self, verbosity):        
        self._verbosity = verbosity     
               
    def setup_env(self):
        configFileHandle = open(self._configFileName)
        # Use RepetConfigParser?
        config = ConfigParser.ConfigParser()
        try :
            config.readfp(configFileHandle)
        except MissingSectionHeaderError:
            self._logAndRaise("Configuration file %s must begin with a section header" % self._configFileName) 

        os.environ["REPET_HOST"] = config.get("repet_env", "repet_host")
        os.environ["REPET_USER"] = config.get("repet_env", "repet_user")
        os.environ["REPET_PW"] = config.get("repet_env", "repet_pw")
        os.environ["REPET_DB"] = config.get("repet_env", "repet_db")
        os.environ["REPET_PORT"] = config.get("repet_env", "repet_port")
        os.environ["REPET_JOB_MANAGER"] = config.get("repet_env", "repet_job_manager")
                  
    def setConfigFileName(self, configFileName):
        self._configFileName = configFileName
                    
    def checkAttributes( self ):
        """
        Check the attributes are valid before running the algorithm.
        """
        if self._pathTable == "":
            self._logAndRaise("PathTable is mandatory")
        if self._refSeqTable == "":
            self._logAndRaise("RefSeqTable is mandatory")  
        if self._genomeSeqTable == "":
            self._logAndRaise("GenomeSeqTable is mandatory")
        if self._configFileName == "":
            self._logAndRaise("Configuration file is mandatory")
        else:
            if FileUtils.isRessourceExists(self._configFileName):
                self.setup_env()
            else:
                self._logAndRaise("Configuration file '%s' does not exist!" % self._configFileName)
            
        if ( self.step == 2 or self.step == 3 ) and self._MSAmethod not in ["RefAlign","Map"]:
            if self._MSAmethod == None or self._MSAmethod == "":
                self._logAndRaise("Missing method option")
            self._logAndRaise("Method '%s' not yet available" % ( self._MSAmethod ))
            
        if self._tmpDir == "":
            self._tmpDir  = os.getcwd()    
            
    def connectSql(self):  
        self._db =  DbFactory().createInstance(configFileName = self._configFileName, verbosity = 1)
        self._tpaAnnot = TablePathAdaptator(self._db, self._pathTable)
        self._tsaRef = TableSeqAdaptator(self._db,self._refSeqTable)
                
    def getNamesOfTErefSeq( self ):
        """
        Return a list with the names of reference TEs.
        """
        lNamesTErefSeq = []
        if self._TErefseq == "":
            lNamesTErefSeq = self._tsaRef.getAccessionsList()
        else:
            if os.path.isfile( self._TErefseq ):
                refseqFileHandler = open( self._TErefseq, "r" )
                while True:
                    line = refseqFileHandler.readline()
                    if line == "":
                        break
                    lNamesTErefSeq.append( line[:-1].split("\t")[0] )
                refseqFileHandler.close()
            else:
                lNamesTErefSeq = [ self._TErefseq ]
                for name in lNamesTErefSeq:
                    if not self._tsaRef.isAccessionInTable( name ):
                        self._log.warning("'%s' not in table '%s'" % (name, self._refSeqTable))
                        lNamesTErefSeq.remove( name )
        lNamesTErefSeq.sort()
        self._log.info("nb of TE reference sequences: %d" % (len(lNamesTErefSeq)))
        return lNamesTErefSeq
    
    
    def getTErefSeqInFastaFiles( self, lNamesTErefSeq ):
        """
        Save sequences of reference TEs in fasta files.
        """
        for name in lNamesTErefSeq:
            self._log.debug("save sequence of '%s'..." % ( name ))
            self._tsaRef.saveAccessionsListInFastaFile( [name], name+".fa" )
            
    def getCopiesInFastaFilesPerTErefSeq( self, lNamesTErefSeq ):
        """
        Save sequences of TE copies in fasta files.
        """
        self._log.info("retrieve the copies...")
        tsaChr = TableSeqAdaptator( self._db, self._genomeSeqTable )
        totalNbCopies = 0
        totalNbSavedCopies = 0
        for name in lNamesTErefSeq:
            nbCopies = 0
            if os.path.exists(name+"_copies.fa"):
                continue
            outFile = open( name+"_copies.fa", "w" )
            self._log.debug("Fetching path nums for subject: %s " % name)
            lPathNums = self._tpaAnnot.getIdListSortedByDecreasingChainLengthFromSubject(name)
            nbCopies = len(lPathNums)
            totalNbCopies += nbCopies
            self._log.debug("refseq '%s': %d copies" % ( name, nbCopies ))
            i = 0
            nbSavedCopies = 0
            nbSavedFragments = 0
            lengthRefseq = self._tsaRef.getSeqLengthFromAccession( name )
            while i < len(lPathNums) and nbSavedCopies < self._nbLongestCopies:
                lPaths = self._tpaAnnot.getPathListFromId( lPathNums[i] )
                lSets = PathUtils.getSetListFromQueries( lPaths )
                copyLength = SetUtils.getCumulLength( lSets )
                if copyLength >= self._minCopyLength \
                and copyLength >= self._minPropCopy * lengthRefseq:
                    bs = tsaChr.getBioseqFromSetList( lSets )
                    bs.write(outFile)
                    nbSavedCopies += 1
                    nbSavedFragments += len(lPaths)
                i += 1
            
            self._log.debug(" (saved: %d copies, %d fragments)\n" % ( nbSavedCopies, nbSavedFragments ) )
            outFile.close()
            totalNbSavedCopies += nbSavedCopies
            if nbSavedCopies == 0 and nbCopies != 0:
                self._log.warning("No copy >= %d" % ( self._minCopyLength ))
        self._log.info("nb of copies: %d (%d saved)" % ( totalNbCopies, totalNbSavedCopies ))
            
    def filter4Alignments( self, lNamesTErefSeq ):
        """
        Filter TE copy sequences according to their length.
        """
        self._log.info("filtering copies...")
        if len(lNamesTErefSeq) == 1:
            if not os.path.exists( "%s_copies.fa" % ( lNamesTErefSeq[0] ) ):
                self._logAndRaise("first run step 1")
            lInFiles = [ "%s_copies.fa" % ( lNamesTErefSeq[0] ) ]
        else:
            lInFiles = glob.glob( "*_copies.fa" )
            if len(lInFiles) == 0:
                self._logAndRaise("no file '*_copies.fa'")
        count = 0
        for inFileName in lInFiles:
            if os.path.exists( "%s.filtered" % ( inFileName ) ):
                continue
            count += 1
            self._log.debug("TE %d --> %s" %(count,inFileName))
            FastaUtils.dbLengthFilter( self._minCopyLength, inFileName, verbose=self._verbosity )
            FastaUtils.dbLongestSequences( self._nbLongestCopies, inFileName+".Sup"+str(self._minCopyLength), verbose=self._verbosity )
            os.rename( inFileName+".Sup"+str(self._minCopyLength)+".best"+str(self._nbLongestCopies), inFileName+".filtered" )
            os.remove( inFileName+".Sup"+str(self._minCopyLength) )
            os.remove( inFileName+".Inf"+str(self._minCopyLength) )
        self._log.info("files filtered: %d" % (count))
            
    def buildInFiles4Launcher( self, lNamesTErefSeq ):
        """
        Save sequences of TE copies with reference in fasta files for launcher usage.
        """
        self._log.info("building input files for launcher...")
        for name in lNamesTErefSeq:
            if os.path.exists( "%s_all.fa.oriented" % ( name ) ):
                continue
            if FastaUtils.dbSize( "%s_copies.fa.filtered" % ( name ) ) > 0:
                os.system( "cat "+name+".fa "+ name+"_copies.fa.filtered > " + name+"_all.fa" )
                ors = OrientSequences(inFileName= "%s_all.fa" %(name), prgToOrient="mummer", clean=True, verbosity =self._verbosity - 1)
                ors.run()
                ors.clean()
        if len( glob.glob("*_all.fa.oriented") ) == 0:
            self._logAndRaise("no copies")
        self._log.info("done building input files for launcher...")   
            
    def renameFile( self, fromName, toName):
        lFiles = glob.glob( "*%s" %fromName )
        for f in lFiles:
            os.rename( f, f.replace(fromName,toName))
    
    def _preparejobs(self, iLauncher, cDir):
        self._log.info("Preparing Align jobs")
        lCmdsTuples = []
        print(self.queriesDir,self.alignFileSuffix)
        queryFiles = glob.glob("%s/%s" % (self.queriesDir,self.alignFileSuffix))
        print("queryFiles",queryFiles)
        for queryFile in queryFiles:#os.listdir(self.queriesDir):
            lCmds = []
            lCmdStart = []
            lCmdFinish = [] #['shutil.move("%s/*" %newDir, "../" )']
            queryFilePath = os.path.join(self.queriesDir,queryFile)
            lCmds.append(self._createLaunchAlignCommands(iLauncher, queryFilePath))
            #lCmdFinish.append("shutil.move(\"%s.%s\", \"%s/%s.%s\")" % (benchName,self.outputformat,self.resultsDir,benchName,self.outputformat))
            lCmdsTuples.append(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))   
        self._log.info("Finished preparing Align jobs")
        return  lCmdsTuples

    def _preparePhyMljobs(self, iLauncher, cDir):
        self._log.info("Preparing PhyMl jobs")
        lCmdsTuples = []
        queryFiles = glob.glob("%s/%s" % (self.queriesDir,self.phyloFileSuffix))
        print("queryFiles",queryFiles)
        for queryFile in queryFiles:#os.listdir(self.queriesDir):
            lCmds = []
            lCmdStart = []
            lCmdFinish = [] #['shutil.move("%s/*" %newDir, "../" )']
            queryFilePath = os.path.join(self.queriesDir,queryFile)
            lCmds.append(self._createLaunchPhyMLCommands(iLauncher, queryFilePath))
            lCmdsTuples.append(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))   
        self._log.info("Finished preparing PhyMl jobs")
        return  lCmdsTuples
    
    def _createLaunchAlignCommands(self, iLauncher, query):
        if self._MSAmethod == "Map":
            prg = "LaunchMap.py"
        elif self._MSAmethod == "RefAlign":
            prg = "LaunchRefAlign.py"
            
        lArgs = []
        lArgs.append("-i %s" % query)
        lArgs.append(" -o %s.fa_aln" % query)    
        lArgs.append("-v 1" )
        
        if self._MSAmethod == "RefAlign" and self._keepRefseq:
            lArgs.append("-r " )
        
        self._log.debug("Prepared Align commands : %s %s" % (prg, " ".join(lArgs)))
        
        
        return iLauncher.getSystemCommand("%s" % prg, lArgs)
                 
    def launchMultAlignments(self, lNamesTErefSeq):
        """
            Make multiple alignments via Map for each TE family
        """
        self.queriesDir =  os.getcwd()
        
        if len(lNamesTErefSeq) == 1:
            self.alignFileSuffix = "%s_all.fa.oriented" % (lNamesTErefSeq[0])
        else:
            self.alignFileSuffix = "*_all.fa.oriented"
        
        queue = self._queue
        cDir = os.getcwd()
        tmpDir =  self._tmpDir 
        groupid = "%s_%s" % ( self._refSeqTable, self._MSAmethod )
        acronym = "Align"
        iDb = DbFactory.createInstance(configFileName=self._configFileName)
        iTJA = TableJobAdaptatorFactory.createInstance(iDb, "jobs")
        iLauncher = Launcher(iTJA, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid)
        lCmdsTuples = self._preparejobs(iLauncher, cDir)
        iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, self._clean) 
            
        self.renameFile("_all.fa.oriented.fa_aln", "_all.fa.oriented_%s.fa_aln" % (self._MSAmethod.lower()) )
            
#    def __makeMultAlignments( self, lNamesTErefSeq ):
#        """
#        Make multiple alignments via Map or Refalign for each TE family
#        """
#        self._pL.reset("")
#        if self._MSAmethod == "Map":
#            
#            prg = os.environ["REPET_PATH"] + "/bin/srptMAP.py"
#        elif self._MSAmethod == "RefAlign":
#            prg = os.environ["REPET_PATH"] + "/bin/srptRefalign.py"
#        cmd = prg
#        cmd += " -g %s_%s" % ( self._refSeqTable, self._MSAmethod )
#        cmd += " -q %s" % ( os.getcwd() )
#        if len(lNamesTErefSeq) == 1:
#            cmd += " -S %s_all.fa.oriented" % ( lNamesTErefSeq[0] )
#        else:
#            cmd += " -S '*_all.fa.oriented'"
#        cmd += " -Q %s" % ( self._queue )
#        cmd += " -C %s" % ( self._configFileName )
#        if self._MSAmethod == "Refalign" and self._keepRefseq:
#            cmd += " -r"
#        if not self._clean :
#            cmd += " -c"
#        if self._tmpDir != "":
#            cmd += " -d %s" % ( self._tmpDir )
#        cmd += " -v %d" % ( self._verbosity )
#        self._pL.launch( prg, cmd )
#        lFiles = glob.glob( "*_all.fa.oriented.fa_aln" )
#        for f in lFiles:
#            os.rename( f, f.replace("_all.fa.oriented.fa_aln","_all.fa.oriented_%s.fa_aln" % (self._MSAmethod.lower()) ) )
            
            
    def filter4phylogenies( self, verbosity=0 ):
        """
        Filter TE copy alignment for better phylogenies.
        """
        self._log.info("Filtering MSA")
        lInFiles = glob.glob( "*_all.fa.oriented_%s.fa_aln" % ( self._MSAmethod.lower() ) )
        count = 0
        for inFileName in lInFiles:
            count += 1    
            self._log.debug("clean MSA %d --> %s" % (count,inFileName))  
            alignDB = AlignedBioseqDB()
            alignDB.load(inFileName)
            alignDB.cleanMSA()
            if alignDB.getSize() > 2:                
                alignDB.save( inFileName + ".clean" )
                self._log.debug("clean!")
            else:
                self._log.debug("skip!")
        self._log.info("MSA cleaned: %d" % count)
            

    def _createLaunchPhyMLCommands(self, iLauncher, query):
#        prg = os.environ["REPET_PATH"] + "/bin/srptPhyML.py"
#        cmd = prg
#        cmd += " -g %s_PHY_%s" % ( self._refSeqTable, os.getpid() )
#        cmd += " -q %s" % ( os.getcwd() )
#        cmd += " -S '*_all.fa.oriented_%s.fa_aln.clean'" % ( self._MSAmethod.lower() )
#        cmd += " -Q %s" % ( self._queue )
#        cmd += " -C %s" % ( self._configFileName )

        prg = "LaunchPhyML.py"
        lArgs = []
        lArgs.append("-i %s" % query)
        lArgs.append("-o %s.fa_phylo" % query)    
        lArgs.append("-v %d" % (self._verbosity-1))
        
        self._log.debug("Prepared Phyml commands : %s %s" % (prg, " ".join(lArgs)))
        return iLauncher.getSystemCommand("%s" % prg, lArgs)
   
    def makePhylogenies( self ):
        """
        Launch PhyML on each TE family.
        """
        self.phyloFileSuffix = "*_all.fa.oriented_%s.fa_aln.clean" % ( self._MSAmethod.lower() )
        
        queue = self._queue
        cDir = os.getcwd()
        tmpDir =  self._tmpDir 
        groupid = "%s_PHY_%s" % ( self._refSeqTable, os.getpid() )
        acronym = "Phylo"
        iDb = DbFactory.createInstance(configFileName=self._configFileName)
        iTJA = TableJobAdaptatorFactory.createInstance(iDb, "jobs")
        iLauncher = Launcher(iTJA, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid)
        lCmdsTuples = self._preparePhyMljobs(iLauncher, cDir)
        iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, self._clean) 

        
    def start( self ):
        """
        Useful commands before running the program.
        """
        self.checkAttributes()
        self._log.info("START GetMultAlignAndPhylogenyPerTErefSeq.py STEP %d" % self.step)
        self.connectSql()
            
    def end( self ):
        """
        Useful commands before ending the program.
        """
        self._db.close()
        self._log.info("END GetMultAlignAndPhylogenyPerTErefSeq.py STEP %d" % self.step)
            
    def run( self ):
        """
        Run the program.
        """
        LoggerFactory.setLevel(self._log, self._verbosity)
        self.start()
        lNamesTErefSeq = self.getNamesOfTErefSeq()
        self._log.debug("lNamesTErefSeq: %s" %  " ".join(lNamesTErefSeq))
        
        if self.step in [0, 1]:
            self.getTErefSeqInFastaFiles( lNamesTErefSeq )
            self.getCopiesInFastaFilesPerTErefSeq( lNamesTErefSeq )
            
        if self.step in [0, 2]:
            self.filter4Alignments(lNamesTErefSeq)
            self.buildInFiles4Launcher(lNamesTErefSeq)
            self.launchMultAlignments(lNamesTErefSeq)
            
        if self.step in [0, 3]:
            self.filter4phylogenies(verbosity=self._verbosity)
            self.makePhylogenies()
        self.end()
        
if __name__ == "__main__":
    iGetMultAlignAndPhylogenyPerTErefSeq = GetMultAlignAndPhylogenyPerTErefSeq()
    iGetMultAlignAndPhylogenyPerTErefSeq.setAttributesFromCmdLine()
    iGetMultAlignAndPhylogenyPerTErefSeq.run()