comparison commons/tools/GetMultAlignAndPhylogenyPerTErefSeq.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 #!/usr/bin/env python
2
3 ##@file
4 # For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.
5 # usage: GetMultAlignAndPhylogenyPerTErefSeq.py [ options ]
6 # options:
7 # -h: this help
8 # -S: step (0: all steps [default], 1:file generation, 2:multiple alignements, 3:phylogenies)
9 # -p: table with the annotations (format=path)
10 # -s: table with the TE reference sequences (format=seq)
11 # -g: table with the genome sequence (format=seq)
12 # -r: name or file with TE reference sequence(s) (all by default)
13 # -m: MSA method (default=Refalign/Map)
14 # -l: minimum length of copies (default=100)
15 # -n: number of longest copies to use (default=20)
16 # -y: minimum copy proportion compare to references (default=0.5)
17 # -R: keep the reference sequence (only with Refalign)
18 # -C: configuration file
19 # -q: queue name
20 # -c: clean
21 # -d: temporary directory
22 # -v: verbosity level (default=0/1)
23
24 import os
25 import sys
26 import glob
27 import ConfigParser
28
29 import pyRepet.launcher.programLauncher
30
31 from commons.core.coord.PathUtils import PathUtils
32 from commons.core.seq.FastaUtils import FastaUtils
33 from commons.core.coord.SetUtils import SetUtils
34 from commons.core.sql.TablePathAdaptator import TablePathAdaptator
35 from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator
36 from commons.tools.OrientSequences import OrientSequences
37 from ConfigParser import MissingSectionHeaderError
38 from commons.core.utils.RepetOptionParser import RepetOptionParser
39 from commons.core.LoggerFactory import LoggerFactory
40 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
41 from commons.launcher import LaunchMap
42 from commons.core.sql.DbFactory import DbFactory
43 from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
44 from commons.core.launcher.Launcher import Launcher
45 from commons.core.utils.FileUtils import FileUtils
46
47
48 LOG_DEPTH = "repet.tools"
49
50 ## For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.
51 #
52 class GetMultAlignAndPhylogenyPerTErefSeq(object):
53
54 def __init__(self, pathTableName="",refSeqTableName="", genomeSeqTableName="", step=0, mSAmethod="RefAlign",keepRefseq=False, configFileName= "", clean = True, verbosity=3):
55 """
56 Constructor.
57 """
58 self.step = step
59 self._pathTable = pathTableName
60 self._refSeqTable = refSeqTableName
61 self._genomeSeqTable = genomeSeqTableName
62 self._TErefseq = ""
63 self._MSAmethod = mSAmethod
64 self._minCopyLength = 100
65 self._nbLongestCopies = 20
66 self._minPropCopy = 0.5
67 self._keepRefseq = keepRefseq
68 self.setConfigFileName(configFileName)
69 self._queue = ""
70 self._tmpDir = ""
71 self._clean = clean
72 self._verbosity = verbosity
73 self._db = None
74 self._tpaAnnot = None
75 self._tsaRef = None
76 self._pL = pyRepet.launcher.programLauncher.programLauncher()
77 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
78
79 def _logAndRaise(self, errorMsg):
80 self._log.error(errorMsg)
81 raise Exception(errorMsg)
82
83 def setAttributesFromCmdLine(self):
84 desc = "For each TE reference sequence, it computes a multiple alignment and a phylogeny of all its copies.\n"
85 #Commented: it's not true, Config File is mandatory!
86 # desc += "Connection to the database parameters are retrieved from the environment"
87
88 #TODO: format options as other scripts (have a look at LaunchTemplate)
89 parser = RepetOptionParser(description = desc, epilog = "")
90 parser.add_option("-S", "--step", dest = "step" , action = "store", type = "int", help = "step (0: all steps [default], 1:file generation, 2:multiple alignments, 3:phylogenies)", default = 0 )
91 parser.add_option("-p", "--pathTable", dest = "path", action= "store", type = "string", help = "(mandatory) table with the annotations (format=path)", default = "")
92 parser.add_option("-s", "--refSeqTable",dest = "refSeqTable", action= "store", type = "string", help = "(mandatory) table with the TE reference sequences (format=seq)", default = "")
93 parser.add_option("-g", "--genomeSeqTable",dest = "genomeSeqTable",action= "store", type = "string", help = "(mandatory) table with the genome sequence (format=seq)", default = "")
94 parser.add_option("-r", "--TErefseq",dest = "TErefseq", action= "store", type = "string", help = "name or file with TE reference sequence(s) (all by default)", default = "")
95 parser.add_option("-m", "--MSAmethod",dest = "MSAmethod", action= "store", type = "string", help = "MSA method (default=RefAlign/Map)", default = "RefAlign")
96 parser.add_option("-l", "--minCopyLength",dest = "minCopyLength", action= "store", type = "int", help = "minimum length of copies (default=100)", default = 100)
97 parser.add_option("-n", "--nbLongestCopies",dest = "nbLongestCopies", action= "store", type = "int", help = "number of longest copies to use (default=20)", default = 20)
98 parser.add_option("-y", "--minPropCopy",dest = "minPropCopy", action= "store", type = "float", help = "minimum copy proportion compare to references (default=0.5)", default = 0.5)
99 parser.add_option("-R", "--keepRefseq",dest = "keepRefseq", action="store_true", help = "keep the reference sequence (only with Refalign)", default = False)
100 parser.add_option("-C", "--config", dest = "configFileName", action = "store", type = "string", help = "(mandatory) config file name to set database connection", default = "")
101 parser.add_option("-q", "--queue",dest = "queue", action= "store", type = "string", help = "queue name", default = "")
102 parser.add_option("-c", "--clean", action="store_false", dest = "clean", help = "don't clean", default = True)
103 parser.add_option("-d", "--tmpDir",dest = "tmpDir", action= "store", type = "string", help = "temporary directory", default = "")
104 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity level (default=0)", default = 0)
105 options = parser.parse_args()[0]
106 self._setAttributesFromOptions(options)
107
108 def _setAttributesFromOptions(self, options):
109 self.setConfigFileName(options.configFileName)
110 self.setStep(options.step)
111 self.setPathTable(options.path)
112 self.setRefSeqTable(options.refSeqTable)
113 self.setGenomeSeqTable(options.genomeSeqTable)
114 self.setTErefseq(options.TErefseq)
115 self.setMSAmethod(options.MSAmethod)
116 self.setMinCopyLength(options.minCopyLength)
117 self.setNbLongestCopies(options.nbLongestCopies)
118 self.setMinPropCopy(options.minPropCopy)
119 self.setKeepRefseq(options.keepRefseq)
120 self.setQueue(options.queue)
121 self.setClean(options.clean)
122 self.setTmpDir(options.tmpDir)
123 self.setVerbosity(options.verbosity)
124
125 def setStep(self, step):
126 self.step = step
127
128 def setPathTable(self, path):
129 self._pathTable = path
130
131 def setRefSeqTable(self, refSeqTable):
132 self._refSeqTable = refSeqTable
133
134 def setGenomeSeqTable(self, genomeSeqTable):
135 self._genomeSeqTable = genomeSeqTable
136
137 def setTErefseq(self, TErefseq):
138 self._TErefseq = TErefseq
139
140 def setMSAmethod(self, MSAmethod):
141 self._MSAmethod = MSAmethod
142
143 def setMinCopyLength(self, minCopyLength):
144 self._minCopyLength = minCopyLength
145
146 def setNbLongestCopies(self, nbLongestCopies):
147 self._nbLongestCopies = nbLongestCopies
148
149 def setMinPropCopy(self, minPropCopy):
150 self._minPropCopy = minPropCopy
151
152 def setKeepRefseq(self, keepRefseq):
153 self._keepRefseq = keepRefseq
154
155 def setQueue(self, queue):
156 self._queue = queue
157
158 def setClean(self, clean):
159 self._clean = clean
160
161 def setTmpDir(self, tmpDir):
162 self._tmpDir = tmpDir
163
164 def setVerbosity(self, verbosity):
165 self._verbosity = verbosity
166
167 def setup_env(self):
168 configFileHandle = open(self._configFileName)
169 # Use RepetConfigParser?
170 config = ConfigParser.ConfigParser()
171 try :
172 config.readfp(configFileHandle)
173 except MissingSectionHeaderError:
174 self._logAndRaise("Configuration file %s must begin with a section header" % self._configFileName)
175
176 os.environ["REPET_HOST"] = config.get("repet_env", "repet_host")
177 os.environ["REPET_USER"] = config.get("repet_env", "repet_user")
178 os.environ["REPET_PW"] = config.get("repet_env", "repet_pw")
179 os.environ["REPET_DB"] = config.get("repet_env", "repet_db")
180 os.environ["REPET_PORT"] = config.get("repet_env", "repet_port")
181 os.environ["REPET_JOB_MANAGER"] = config.get("repet_env", "repet_job_manager")
182
183 def setConfigFileName(self, configFileName):
184 self._configFileName = configFileName
185
186 def checkAttributes( self ):
187 """
188 Check the attributes are valid before running the algorithm.
189 """
190 if self._pathTable == "":
191 self._logAndRaise("PathTable is mandatory")
192 if self._refSeqTable == "":
193 self._logAndRaise("RefSeqTable is mandatory")
194 if self._genomeSeqTable == "":
195 self._logAndRaise("GenomeSeqTable is mandatory")
196 if self._configFileName == "":
197 self._logAndRaise("Configuration file is mandatory")
198 else:
199 if FileUtils.isRessourceExists(self._configFileName):
200 self.setup_env()
201 else:
202 self._logAndRaise("Configuration file '%s' does not exist!" % self._configFileName)
203
204 if ( self.step == 2 or self.step == 3 ) and self._MSAmethod not in ["RefAlign","Map"]:
205 if self._MSAmethod == None or self._MSAmethod == "":
206 self._logAndRaise("Missing method option")
207 self._logAndRaise("Method '%s' not yet available" % ( self._MSAmethod ))
208
209 if self._tmpDir == "":
210 self._tmpDir = os.getcwd()
211
212 def connectSql(self):
213 self._db = DbFactory().createInstance(configFileName = self._configFileName, verbosity = 1)
214 self._tpaAnnot = TablePathAdaptator(self._db, self._pathTable)
215 self._tsaRef = TableSeqAdaptator(self._db,self._refSeqTable)
216
217 def getNamesOfTErefSeq( self ):
218 """
219 Return a list with the names of reference TEs.
220 """
221 lNamesTErefSeq = []
222 if self._TErefseq == "":
223 lNamesTErefSeq = self._tsaRef.getAccessionsList()
224 else:
225 if os.path.isfile( self._TErefseq ):
226 refseqFileHandler = open( self._TErefseq, "r" )
227 while True:
228 line = refseqFileHandler.readline()
229 if line == "":
230 break
231 lNamesTErefSeq.append( line[:-1].split("\t")[0] )
232 refseqFileHandler.close()
233 else:
234 lNamesTErefSeq = [ self._TErefseq ]
235 for name in lNamesTErefSeq:
236 if not self._tsaRef.isAccessionInTable( name ):
237 self._log.warning("'%s' not in table '%s'" % (name, self._refSeqTable))
238 lNamesTErefSeq.remove( name )
239 lNamesTErefSeq.sort()
240 self._log.info("nb of TE reference sequences: %d" % (len(lNamesTErefSeq)))
241 return lNamesTErefSeq
242
243
244 def getTErefSeqInFastaFiles( self, lNamesTErefSeq ):
245 """
246 Save sequences of reference TEs in fasta files.
247 """
248 for name in lNamesTErefSeq:
249 self._log.debug("save sequence of '%s'..." % ( name ))
250 self._tsaRef.saveAccessionsListInFastaFile( [name], name+".fa" )
251
252 def getCopiesInFastaFilesPerTErefSeq( self, lNamesTErefSeq ):
253 """
254 Save sequences of TE copies in fasta files.
255 """
256 self._log.info("retrieve the copies...")
257 tsaChr = TableSeqAdaptator( self._db, self._genomeSeqTable )
258 totalNbCopies = 0
259 totalNbSavedCopies = 0
260 for name in lNamesTErefSeq:
261 nbCopies = 0
262 if os.path.exists(name+"_copies.fa"):
263 continue
264 outFile = open( name+"_copies.fa", "w" )
265 self._log.debug("Fetching path nums for subject: %s " % name)
266 lPathNums = self._tpaAnnot.getIdListSortedByDecreasingChainLengthFromSubject(name)
267 nbCopies = len(lPathNums)
268 totalNbCopies += nbCopies
269 self._log.debug("refseq '%s': %d copies" % ( name, nbCopies ))
270 i = 0
271 nbSavedCopies = 0
272 nbSavedFragments = 0
273 lengthRefseq = self._tsaRef.getSeqLengthFromAccession( name )
274 while i < len(lPathNums) and nbSavedCopies < self._nbLongestCopies:
275 lPaths = self._tpaAnnot.getPathListFromId( lPathNums[i] )
276 lSets = PathUtils.getSetListFromQueries( lPaths )
277 copyLength = SetUtils.getCumulLength( lSets )
278 if copyLength >= self._minCopyLength \
279 and copyLength >= self._minPropCopy * lengthRefseq:
280 bs = tsaChr.getBioseqFromSetList( lSets )
281 bs.write(outFile)
282 nbSavedCopies += 1
283 nbSavedFragments += len(lPaths)
284 i += 1
285
286 self._log.debug(" (saved: %d copies, %d fragments)\n" % ( nbSavedCopies, nbSavedFragments ) )
287 outFile.close()
288 totalNbSavedCopies += nbSavedCopies
289 if nbSavedCopies == 0 and nbCopies != 0:
290 self._log.warning("No copy >= %d" % ( self._minCopyLength ))
291 self._log.info("nb of copies: %d (%d saved)" % ( totalNbCopies, totalNbSavedCopies ))
292
293 def filter4Alignments( self, lNamesTErefSeq ):
294 """
295 Filter TE copy sequences according to their length.
296 """
297 self._log.info("filtering copies...")
298 if len(lNamesTErefSeq) == 1:
299 if not os.path.exists( "%s_copies.fa" % ( lNamesTErefSeq[0] ) ):
300 self._logAndRaise("first run step 1")
301 lInFiles = [ "%s_copies.fa" % ( lNamesTErefSeq[0] ) ]
302 else:
303 lInFiles = glob.glob( "*_copies.fa" )
304 if len(lInFiles) == 0:
305 self._logAndRaise("no file '*_copies.fa'")
306 count = 0
307 for inFileName in lInFiles:
308 if os.path.exists( "%s.filtered" % ( inFileName ) ):
309 continue
310 count += 1
311 self._log.debug("TE %d --> %s" %(count,inFileName))
312 FastaUtils.dbLengthFilter( self._minCopyLength, inFileName, verbose=self._verbosity )
313 FastaUtils.dbLongestSequences( self._nbLongestCopies, inFileName+".Sup"+str(self._minCopyLength), verbose=self._verbosity )
314 os.rename( inFileName+".Sup"+str(self._minCopyLength)+".best"+str(self._nbLongestCopies), inFileName+".filtered" )
315 os.remove( inFileName+".Sup"+str(self._minCopyLength) )
316 os.remove( inFileName+".Inf"+str(self._minCopyLength) )
317 self._log.info("files filtered: %d" % (count))
318
319 def buildInFiles4Launcher( self, lNamesTErefSeq ):
320 """
321 Save sequences of TE copies with reference in fasta files for launcher usage.
322 """
323 self._log.info("building input files for launcher...")
324 for name in lNamesTErefSeq:
325 if os.path.exists( "%s_all.fa.oriented" % ( name ) ):
326 continue
327 if FastaUtils.dbSize( "%s_copies.fa.filtered" % ( name ) ) > 0:
328 os.system( "cat "+name+".fa "+ name+"_copies.fa.filtered > " + name+"_all.fa" )
329 ors = OrientSequences(inFileName= "%s_all.fa" %(name), prgToOrient="mummer", clean=True, verbosity =self._verbosity - 1)
330 ors.run()
331 ors.clean()
332 if len( glob.glob("*_all.fa.oriented") ) == 0:
333 self._logAndRaise("no copies")
334 self._log.info("done building input files for launcher...")
335
336 def renameFile( self, fromName, toName):
337 lFiles = glob.glob( "*%s" %fromName )
338 for f in lFiles:
339 os.rename( f, f.replace(fromName,toName))
340
341 def _preparejobs(self, iLauncher, cDir):
342 self._log.info("Preparing Align jobs")
343 lCmdsTuples = []
344 print(self.queriesDir,self.alignFileSuffix)
345 queryFiles = glob.glob("%s/%s" % (self.queriesDir,self.alignFileSuffix))
346 print("queryFiles",queryFiles)
347 for queryFile in queryFiles:#os.listdir(self.queriesDir):
348 lCmds = []
349 lCmdStart = []
350 lCmdFinish = [] #['shutil.move("%s/*" %newDir, "../" )']
351 queryFilePath = os.path.join(self.queriesDir,queryFile)
352 lCmds.append(self._createLaunchAlignCommands(iLauncher, queryFilePath))
353 #lCmdFinish.append("shutil.move(\"%s.%s\", \"%s/%s.%s\")" % (benchName,self.outputformat,self.resultsDir,benchName,self.outputformat))
354 lCmdsTuples.append(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
355 self._log.info("Finished preparing Align jobs")
356 return lCmdsTuples
357
358 def _preparePhyMljobs(self, iLauncher, cDir):
359 self._log.info("Preparing PhyMl jobs")
360 lCmdsTuples = []
361 queryFiles = glob.glob("%s/%s" % (self.queriesDir,self.phyloFileSuffix))
362 print("queryFiles",queryFiles)
363 for queryFile in queryFiles:#os.listdir(self.queriesDir):
364 lCmds = []
365 lCmdStart = []
366 lCmdFinish = [] #['shutil.move("%s/*" %newDir, "../" )']
367 queryFilePath = os.path.join(self.queriesDir,queryFile)
368 lCmds.append(self._createLaunchPhyMLCommands(iLauncher, queryFilePath))
369 lCmdsTuples.append(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
370 self._log.info("Finished preparing PhyMl jobs")
371 return lCmdsTuples
372
373 def _createLaunchAlignCommands(self, iLauncher, query):
374 if self._MSAmethod == "Map":
375 prg = "LaunchMap.py"
376 elif self._MSAmethod == "RefAlign":
377 prg = "LaunchRefAlign.py"
378
379 lArgs = []
380 lArgs.append("-i %s" % query)
381 lArgs.append(" -o %s.fa_aln" % query)
382 lArgs.append("-v 1" )
383
384 if self._MSAmethod == "RefAlign" and self._keepRefseq:
385 lArgs.append("-r " )
386
387 self._log.debug("Prepared Align commands : %s %s" % (prg, " ".join(lArgs)))
388
389
390 return iLauncher.getSystemCommand("%s" % prg, lArgs)
391
392 def launchMultAlignments(self, lNamesTErefSeq):
393 """
394 Make multiple alignments via Map for each TE family
395 """
396 self.queriesDir = os.getcwd()
397
398 if len(lNamesTErefSeq) == 1:
399 self.alignFileSuffix = "%s_all.fa.oriented" % (lNamesTErefSeq[0])
400 else:
401 self.alignFileSuffix = "*_all.fa.oriented"
402
403 queue = self._queue
404 cDir = os.getcwd()
405 tmpDir = self._tmpDir
406 groupid = "%s_%s" % ( self._refSeqTable, self._MSAmethod )
407 acronym = "Align"
408 iDb = DbFactory.createInstance(configFileName=self._configFileName)
409 iTJA = TableJobAdaptatorFactory.createInstance(iDb, "jobs")
410 iLauncher = Launcher(iTJA, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid)
411 lCmdsTuples = self._preparejobs(iLauncher, cDir)
412 iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, self._clean)
413
414 self.renameFile("_all.fa.oriented.fa_aln", "_all.fa.oriented_%s.fa_aln" % (self._MSAmethod.lower()) )
415
416 # def __makeMultAlignments( self, lNamesTErefSeq ):
417 # """
418 # Make multiple alignments via Map or Refalign for each TE family
419 # """
420 # self._pL.reset("")
421 # if self._MSAmethod == "Map":
422 #
423 # prg = os.environ["REPET_PATH"] + "/bin/srptMAP.py"
424 # elif self._MSAmethod == "RefAlign":
425 # prg = os.environ["REPET_PATH"] + "/bin/srptRefalign.py"
426 # cmd = prg
427 # cmd += " -g %s_%s" % ( self._refSeqTable, self._MSAmethod )
428 # cmd += " -q %s" % ( os.getcwd() )
429 # if len(lNamesTErefSeq) == 1:
430 # cmd += " -S %s_all.fa.oriented" % ( lNamesTErefSeq[0] )
431 # else:
432 # cmd += " -S '*_all.fa.oriented'"
433 # cmd += " -Q %s" % ( self._queue )
434 # cmd += " -C %s" % ( self._configFileName )
435 # if self._MSAmethod == "Refalign" and self._keepRefseq:
436 # cmd += " -r"
437 # if not self._clean :
438 # cmd += " -c"
439 # if self._tmpDir != "":
440 # cmd += " -d %s" % ( self._tmpDir )
441 # cmd += " -v %d" % ( self._verbosity )
442 # self._pL.launch( prg, cmd )
443 # lFiles = glob.glob( "*_all.fa.oriented.fa_aln" )
444 # for f in lFiles:
445 # os.rename( f, f.replace("_all.fa.oriented.fa_aln","_all.fa.oriented_%s.fa_aln" % (self._MSAmethod.lower()) ) )
446
447
448 def filter4phylogenies( self, verbosity=0 ):
449 """
450 Filter TE copy alignment for better phylogenies.
451 """
452 self._log.info("Filtering MSA")
453 lInFiles = glob.glob( "*_all.fa.oriented_%s.fa_aln" % ( self._MSAmethod.lower() ) )
454 count = 0
455 for inFileName in lInFiles:
456 count += 1
457 self._log.debug("clean MSA %d --> %s" % (count,inFileName))
458 alignDB = AlignedBioseqDB()
459 alignDB.load(inFileName)
460 alignDB.cleanMSA()
461 if alignDB.getSize() > 2:
462 alignDB.save( inFileName + ".clean" )
463 self._log.debug("clean!")
464 else:
465 self._log.debug("skip!")
466 self._log.info("MSA cleaned: %d" % count)
467
468
469 def _createLaunchPhyMLCommands(self, iLauncher, query):
470 # prg = os.environ["REPET_PATH"] + "/bin/srptPhyML.py"
471 # cmd = prg
472 # cmd += " -g %s_PHY_%s" % ( self._refSeqTable, os.getpid() )
473 # cmd += " -q %s" % ( os.getcwd() )
474 # cmd += " -S '*_all.fa.oriented_%s.fa_aln.clean'" % ( self._MSAmethod.lower() )
475 # cmd += " -Q %s" % ( self._queue )
476 # cmd += " -C %s" % ( self._configFileName )
477
478 prg = "LaunchPhyML.py"
479 lArgs = []
480 lArgs.append("-i %s" % query)
481 lArgs.append("-o %s.fa_phylo" % query)
482 lArgs.append("-v %d" % (self._verbosity-1))
483
484 self._log.debug("Prepared Phyml commands : %s %s" % (prg, " ".join(lArgs)))
485 return iLauncher.getSystemCommand("%s" % prg, lArgs)
486
487 def makePhylogenies( self ):
488 """
489 Launch PhyML on each TE family.
490 """
491 self.phyloFileSuffix = "*_all.fa.oriented_%s.fa_aln.clean" % ( self._MSAmethod.lower() )
492
493 queue = self._queue
494 cDir = os.getcwd()
495 tmpDir = self._tmpDir
496 groupid = "%s_PHY_%s" % ( self._refSeqTable, os.getpid() )
497 acronym = "Phylo"
498 iDb = DbFactory.createInstance(configFileName=self._configFileName)
499 iTJA = TableJobAdaptatorFactory.createInstance(iDb, "jobs")
500 iLauncher = Launcher(iTJA, os.getcwd(), "", "", cDir, tmpDir, "jobs", queue, groupid)
501 lCmdsTuples = self._preparePhyMljobs(iLauncher, cDir)
502 iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, self._clean)
503
504
505 def start( self ):
506 """
507 Useful commands before running the program.
508 """
509 self.checkAttributes()
510 self._log.info("START GetMultAlignAndPhylogenyPerTErefSeq.py STEP %d" % self.step)
511 self.connectSql()
512
513 def end( self ):
514 """
515 Useful commands before ending the program.
516 """
517 self._db.close()
518 self._log.info("END GetMultAlignAndPhylogenyPerTErefSeq.py STEP %d" % self.step)
519
520 def run( self ):
521 """
522 Run the program.
523 """
524 LoggerFactory.setLevel(self._log, self._verbosity)
525 self.start()
526 lNamesTErefSeq = self.getNamesOfTErefSeq()
527 self._log.debug("lNamesTErefSeq: %s" % " ".join(lNamesTErefSeq))
528
529 if self.step in [0, 1]:
530 self.getTErefSeqInFastaFiles( lNamesTErefSeq )
531 self.getCopiesInFastaFilesPerTErefSeq( lNamesTErefSeq )
532
533 if self.step in [0, 2]:
534 self.filter4Alignments(lNamesTErefSeq)
535 self.buildInFiles4Launcher(lNamesTErefSeq)
536 self.launchMultAlignments(lNamesTErefSeq)
537
538 if self.step in [0, 3]:
539 self.filter4phylogenies(verbosity=self._verbosity)
540 self.makePhylogenies()
541 self.end()
542
543 if __name__ == "__main__":
544 iGetMultAlignAndPhylogenyPerTErefSeq = GetMultAlignAndPhylogenyPerTErefSeq()
545 iGetMultAlignAndPhylogenyPerTErefSeq.setAttributesFromCmdLine()
546 iGetMultAlignAndPhylogenyPerTErefSeq.run()
547