comparison commons/launcher/MafftProgramLauncher.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 #!/usr/bin/env python
2
3 ##@file
4 # Launch Mafft (multiple alignment).
5 #
6 # options:
7 # -h: this help
8 # -i: name of the input file (format='fasta')
9 # -p: parameters for 'mafft' (default='--auto')
10 # -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')
11 # -c: clean
12 # -v: verbosity level (default=0/1)
13
14
15 import os
16 import sys
17 import getopt
18 import exceptions
19
20 from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher
21 from pyRepet.seq.fastaDB import *
22 from commons.core.seq.FastaUtils import FastaUtils
23 from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
24 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
25
26
27 class MafftProgramLauncher( AbstractProgramLauncher ):
28 """
29 Launch Mafft (multiple alignment).
30 """
31
32
33 def __init__( self ):
34 """
35 Constructor.
36 """
37 AbstractProgramLauncher.__init__( self )
38 self._prgName = "mafft"
39 self._formatInFile = "fasta"
40 self._prgParam = "--auto"
41 self._cmdLineSpecificOptions = "p:o:"
42
43
44 def getSpecificHelpAsString( self ):
45 """
46 Return the specific help as a string.
47 """
48 string = ""
49 string += "\nspecific options:"
50 string += "\n -p: parameters for '%s' (default='--auto')" % ( self.getProgramName() )
51 string += "\n -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')"
52 return string
53
54
55 def setASpecificAttributeFromCmdLine( self, o, a="" ):
56 """
57 Set a specific attribute from the command-line arguments.
58 """
59 if o == "-p":
60 self.setProgramParameters( a )
61 elif o == "-o":
62 self.setOutputFile( a )
63
64
65 def checkSpecificAttributes( self ):
66 """
67 Check the specific attributes before running the program.
68 """
69 if self.getOutputFile() == "":
70 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
71
72
73 def setWrapperCommandLine( self ):
74 """
75 Set the command-line of the wrapper.
76 Required for MafftClusterLauncher.
77 """
78 self._wrpCmdLine = self.getWrapperName()
79 self._wrpCmdLine += " -i %s" % ( self.getInputFile() )
80 self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() )
81 if self.getOutputFile() == "":
82 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
83 self._wrpCmdLine += " -o %s" % ( self.getOutputFile() )
84 if self.getClean():
85 self._wrpCmdLine += " -c"
86 self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() )
87
88
89 def setProgramCommandLine( self ):
90 """
91 Set the command-line of the program.
92 """
93 self._prgCmdLine = self.getProgramName()
94 self._prgCmdLine += " %s" % ( self.getProgramParameters() )
95 if self.getVerbosityLevel() == 0 and "--quiet" not in self._prgCmdLine:
96 self._prgCmdLine += " --quiet"
97 self._prgCmdLine += " %s.shortH" % ( self.getInputFile() )
98 self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() )
99 if self._verbose < 2:
100 self._prgCmdLine += " 2> /dev/null"
101
102
103 def setListFilesToKeep( self ):
104 """
105 Set the list of files to keep.
106 """
107 if self.getOutputFile() == "":
108 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
109 self.appendFileToKeep( self.getOutputFile() )
110
111
112 def setListFilesToRemove( self ):
113 """
114 Set the list of files to remove.
115 """
116 self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) )
117 self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
118 self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) )
119
120
121 def setSummary( self ):
122 self._summary = "input file: %s" % ( self.getInputFile() )
123 self._summary += "\nparameters: %s" % ( self.getProgramParameters() )
124 if self.getOutputFile() == "":
125 self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) )
126 self._summary += "\noutput file: %s" % ( self.getOutputFile() )
127
128
129 def run( self ):
130 """
131 Run the program.
132 """
133 self.start()
134
135 lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 )
136
137 csh = ChangeSequenceHeaders()
138 csh.setInputFile( self.getInputFile() )
139 csh.setFormat( "fasta" )
140 csh.setStep( 1 )
141 csh.setPrefix( "seq" )
142 csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
143 csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) )
144 csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
145 csh.run()
146
147 bsDB = BioseqDB( "%s.shortH" % ( self.getInputFile() ) )
148 bsDB.upCase()
149 bsDB.save( "%s.shortHtmp" % ( self.getInputFile() ) )
150 del bsDB
151 os.rename( "%s.shortHtmp" % ( self.getInputFile() ),
152 "%s.shortH" % ( self.getInputFile() ) )
153
154 self.setProgramCommandLine()
155 cmd = self.getProgramCommandLine()
156 if self.getVerbosityLevel() > 0:
157 print "LAUNCH: %s" % ( cmd )
158 sys.stdout.flush()
159 exitStatus = os.system( cmd )
160 if exitStatus != 0:
161 string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus )
162 print string
163 sys.exit(1)
164
165 csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) )
166 csh.setFormat( "fasta" )
167 csh.setStep( 2 )
168 csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) )
169 csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
170 csh.setVerbosityLevel( self.getVerbosityLevel() - 1 )
171 csh.run()
172
173 absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
174 outFileHandler = open( self.getOutputFile(), "w" )
175 for header in lInitHeaders:
176 bs = absDB.fetch( header )
177 bs.upCase()
178 bs.write( outFileHandler )
179 outFileHandler.close()
180 os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) )
181
182 self.end()
183
184
185 if __name__ == "__main__":
186 i = MafftProgramLauncher()
187 i.setAttributesFromCmdLine()
188 i.run()