comparison commons/launcher/MapProgramLauncher.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 #!/usr/bin/env python
2
3 ##@file
4 # Launch Map (multiple alignment).
5 #
6 # options:
7 # -h: this help
8 # -i: name of the input file (format='fasta')
9 # -s: size above which a gap is not penalized anymore (default='50')
10 # -m: penalty for a mismatch (default='-8')
11 # -O: penalty for a gap opening (default='16')
12 # -e: penalty for a gap extension (default='4')
13 # -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')
14 # -c: clean
15 # -v: verbosity level (default=0/1)
16
17
18 import sys
19 import os
20
21 from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
22 from commons.core.seq.FastaUtils import FastaUtils
23 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
24 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
25
26
27 class MapProgramLauncher( AbstractProgramLauncher ):
28 """
29 Launch Map (multiple alignment).
30 """
31
32
33 def __init__( self ):
34 """
35 Constructor.
36 """
37 AbstractProgramLauncher.__init__( self )
38 self._prgName = "rpt_map"
39 self._formatInFile = "fasta"
40 self._cmdLineSpecificOptions = "s:m:O:e:o:"
41 self._gapSize = 50
42 self._mismatchPenalty = -8
43 self._gapOpenPenalty = 16
44 self._gapExtendPenalty = 4
45 self._outFile = ""
46
47 def getSpecificHelpAsString( self ):
48 """
49 Return the specific help as a string.
50 """
51 string = ""
52 string += "\nspecific options:"
53 string += "\n -s: size above which a gap is not penalized anymore (default='%i')" % ( self.getGapSize() )
54 string += "\n -m: penalty for a mismatch (default='%i', match=10)" % ( self.getMismatchPenalty() )
55 string += "\n -O: penalty for a gap opening (default='%i')" % ( self.getGapOpenPenalty() )
56 string += "\n -e: penalty for a gap extension (default='%i')" % ( self.getGapExtendPenalty() )
57 string += "\n -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')"
58 return string
59
60
61 def setASpecificAttributeFromCmdLine( self, o, a="" ):
62 """
63 Set a specific attribute from the command-line arguments.
64 """
65 if o == "-s":
66 self.setGapSize( a )
67 elif o == "-m":
68 self.setMismatchPenalty( a )
69 elif o == "-O":
70 self.setGapOpenPenalty( a )
71 elif o == "-e":
72 self.setGapExtendPenalty( a )
73 elif o == "-o":
74 self.setOutputFile( a )
75
76
77 def setGapSize( self, arg ):
78 self._gapSize = int(arg)
79
80
81 def setMismatchPenalty( self, arg ):
82 self._mismatchPenalty = int(arg)
83
84
85 def setGapOpenPenalty( self, arg ):
86 self._gapOpenPenalty = int(arg)
87
88
89 def setGapExtendPenalty( self, arg ):
90 self._gapExtendPenalty = int(arg)
91
92
93 def getGapSize( self ):
94 return self._gapSize
95
96
97 def getMismatchPenalty( self ):
98 return self._mismatchPenalty
99
100
101 def getGapOpenPenalty( self ):
102 return self._gapOpenPenalty
103
104
105 def getGapExtendPenalty( self ):
106 return self._gapExtendPenalty
107
108
109 def checkSpecificAttributes( self ):
110 """
111 Check the specific attributes before running the program.
112 """
113 if self.getGapSize() <= 0:
114 string = "ERROR: gap size should be > 0"
115 print string
116 print self.getHelpAsString()
117 sys.exit(1)
118 if self.getMismatchPenalty() >= 0:
119 string = "ERROR: mismatch penalty should be < 0"
120 print string
121 print self.getHelpAsString()
122 sys.exit(1)
123 if self.getGapOpenPenalty() < 0:
124 string = "ERROR: gap opening penalty should be >= 0"
125 print string
126 print self.getHelpAsString()
127 sys.exit(1)
128 if self.getGapExtendPenalty() < 0:
129 string = "ERROR: gap extension penalty should be >= 0"
130 print string
131 print self.getHelpAsString()
132 sys.exit(1)
133 if self.getOutputFile() == "":
134 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
135
136
137 def setWrapperCommandLine( self ):
138 """
139 Set the command-line of the wrapper.
140 Required for MapClusterLauncher.
141 """
142 self._wrpCmdLine = self.getWrapperName()
143 self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
144 self._wrpCmdLine += " -s %i" % ( self.getGapSize() )
145 self._wrpCmdLine += " -m %i" % ( self.getMismatchPenalty() )
146 self._wrpCmdLine += " -O %i" % ( self.getGapOpenPenalty() )
147 self._wrpCmdLine += " -e %i" % ( self.getGapExtendPenalty() )
148 if self.getOutputFile() == "":
149 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
150 self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
151 if self.getClean():
152 self._wrpCmdLine += " -c"
153 self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
154
155
156 def setProgramCommandLine( self ):
157 """
158 Set the command-line of the program.
159 """
160 self._prgCmdLine = self.getProgramName()
161 self._prgCmdLine += " %s.shortH" % ( self.getInputFile() )
162 self._prgCmdLine += " %i" % ( self.getGapSize() )
163 self._prgCmdLine += " %i" % ( self.getMismatchPenalty() )
164 self._prgCmdLine += " %i" % ( self.getGapOpenPenalty() )
165 self._prgCmdLine += " %i" % ( self.getGapExtendPenalty() )
166 self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() )
167
168
169 def setListFilesToKeep( self ):
170 """
171 Set the list of files to keep.
172 """
173 if self.getOutputFile() == "":
174 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
175 self.appendFileToKeep( self.getOutputFile() )
176
177
178 def setListFilesToRemove( self ):
179 """
180 Set the list of files to remove.
181 """
182 self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) )
183 self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
184 self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) )
185
186
187 def setSummary( self ):
188 self._summary = "input file: %s" % ( self.getInputFile() )
189 self._summary += "\ngap size: %i" % ( self.getGapSize() )
190 self._summary += "\nmismatch penalty: %i" % ( self.getMismatchPenalty() )
191 self._summary += "\ngap openning penalty: %i" % ( self.getGapOpenPenalty() )
192 self._summary += "\ngap extension penalty: %i" % ( self.getGapExtendPenalty() )
193 if self.getOutputFile() == "":
194 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
195 self._summary += "\noutput file: %s" % ( self.getOutputFile() )
196
197
198 def run( self ):
199 """
200 Run the program.
201 """
202 self.start()
203
204 lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 )
205
206 csh = ChangeSequenceHeaders()
207 csh.setInputFile( self.getInputFile() )
208 csh.setFormat( "fasta" )
209 csh.setStep( 1 )
210 csh.setPrefix( "seq" )
211 csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
212 csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) )
213 csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
214 csh.run()
215
216 self.setProgramCommandLine()
217 cmd = self.getProgramCommandLine()
218 if self.getVerbosityLevel() > 0:
219 print "LAUNCH: %s" % ( cmd )
220 sys.stdout.flush()
221 returnStatus = os.system( cmd )
222 if returnStatus != 0:
223 string = "ERROR: program '%s' returned status '%i'" % ( self.getProgramName(), returnStatus )
224 print string
225 sys.exit(1)
226
227 csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
228 csh.setFormat( "fasta" )
229 csh.setStep( 2 )
230 csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
231 csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
232 csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
233 csh.run()
234
235 absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
236 outFileHandler = open( self.getOutputFile(), "w" )
237 for header in lInitHeaders:
238 bs = absDB.fetch( header )
239 bs.upCase()
240 bs.write( outFileHandler )
241 outFileHandler.close()
242 if self.getClean():
243 os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
244
245 self.end()
246
247
248 if __name__ == "__main__":
249 i = MapProgramLauncher()
250 i.setAttributesFromCmdLine()
251 i.run()