annotate commons/launcher/MafftProgramLauncher.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 ##@file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 # Launch Mafft (multiple alignment).
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 # options:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # -h: this help
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 # -i: name of the input file (format='fasta')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 # -p: parameters for 'mafft' (default='--auto')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 # -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # -c: clean
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 # -v: verbosity level (default=0/1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 import exceptions
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 from pyRepet.seq.fastaDB import *
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 from commons.core.seq.FastaUtils import FastaUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 class MafftProgramLauncher( AbstractProgramLauncher ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 Launch Mafft (multiple alignment).
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 def __init__( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 Constructor.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 AbstractProgramLauncher.__init__( self )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 self._prgName = "mafft"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 self._formatInFile = "fasta"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 self._prgParam = "--auto"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 self._cmdLineSpecificOptions = "p:o:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 def getSpecificHelpAsString( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 Return the specific help as a string.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 string = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 string += "\nspecific options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 string += "\n -p: parameters for '%s' (default='--auto')" % ( self.getProgramName() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 string += "\n -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 return string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 def setASpecificAttributeFromCmdLine( self, o, a="" ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 Set a specific attribute from the command-line arguments.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 if o == "-p":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 self.setProgramParameters( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 elif o == "-o":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 self.setOutputFile( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 def checkSpecificAttributes( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 Check the specific attributes before running the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 if self.getOutputFile() == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 def setWrapperCommandLine( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 Set the command-line of the wrapper.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 Required for MafftClusterLauncher.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 self._wrpCmdLine = self.getWrapperName()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 if self.getOutputFile() == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 if self.getClean():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 self._wrpCmdLine += " -c"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 def setProgramCommandLine( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 Set the command-line of the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 self._prgCmdLine = self.getProgramName()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 self._prgCmdLine += " %s" % ( self.getProgramParameters() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 if self.getVerbosityLevel() == 0 and "--quiet" not in self._prgCmdLine:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 self._prgCmdLine += " --quiet"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 self._prgCmdLine += " %s.shortH" % ( self.getInputFile() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 if self._verbose < 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 self._prgCmdLine += " 2> /dev/null"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 def setListFilesToKeep( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 Set the list of files to keep.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 if self.getOutputFile() == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 self.appendFileToKeep( self.getOutputFile() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 def setListFilesToRemove( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 Set the list of files to remove.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 def setSummary( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 self._summary = "input file: %s" % ( self.getInputFile() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 self._summary += "\nparameters: %s" % ( self.getProgramParameters() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 if self.getOutputFile() == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 self._summary += "\noutput file: %s" % ( self.getOutputFile() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 def run( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 Run the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 self.start()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 csh = ChangeSequenceHeaders()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 csh.setInputFile( self.getInputFile() )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 csh.setFormat( "fasta" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 csh.setStep( 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 csh.setPrefix( "seq" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 bsDB = BioseqDB( "%s.shortH" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 bsDB.upCase()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 bsDB.save( "%s.shortHtmp" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 del bsDB
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 os.rename( "%s.shortHtmp" % ( self.getInputFile() ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 "%s.shortH" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 self.setProgramCommandLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 cmd = self.getProgramCommandLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 if self.getVerbosityLevel() > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 print "LAUNCH: %s" % ( cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 exitStatus = os.system( cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 if exitStatus != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162 print string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 csh.setFormat( "fasta" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 csh.setStep( 2 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 outFileHandler = open( self.getOutputFile(), "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 for header in lInitHeaders:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 bs = absDB.fetch( header )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 bs.upCase()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 bs.write( outFileHandler )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 outFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182 self.end()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 i = MafftProgramLauncher()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 i.setAttributesFromCmdLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 i.run()