annotate commons/launcher/LaunchPhyML.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 # Copyright INRA (Institut National de la Recherche Agronomique)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 # http://www.inra.fr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 # http://urgi.versailles.inra.fr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # This software is governed by the CeCILL license under French law and
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 # abiding by the rules of distribution of free software. You can use,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 # modify and/ or redistribute the software under the terms of the CeCILL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 # license as circulated by CEA, CNRS and INRIA at the following URL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # "http://www.cecill.info".
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 # As a counterpart to the access to the source code and rights to copy,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 # modify and redistribute granted by the license, users are provided only
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 # with a limited warranty and the software's author, the holder of the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 # economic rights, and the successive licensors have only limited
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 # liability.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 # In this respect, the user's attention is drawn to the risks associated
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 # with loading, using, modifying and/or developing or reproducing the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 # software by the user in light of its specific status of free software,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 # that may mean that it is complicated to manipulate, and that also
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 # therefore means that it is reserved for developers and experienced
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 # professionals having in-depth computer knowledge. Users are therefore
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 # encouraged to load and test the software's suitability as regards their
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 # requirements in conditions enabling the security of their systems and/or
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 # data to be ensured and, more generally, to use and operate it in the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 # same conditions as regards security.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 # The fact that you are presently reading this means that you have had
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 # knowledge of the CeCILL license and that you accept its terms.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 from commons.core.LoggerFactory import LoggerFactory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 from commons.core.utils.RepetOptionParser import RepetOptionParser
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 import subprocess
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 from commons.core.seq.Bioseq import Bioseq
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 import shutil
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 LOG_DEPTH = "repet.core.launchers"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 class LaunchPhyML(object):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 Launch 'PhyML'
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 def __init__(self, inputFileName="", outFileName="",dataType= "nt", interleavedFormat= True, nbDataSets=1, nbBootDataSets=0, substModel="HKY85", ratioTsTv=4.0, propInvSites= 0.0, nbCat=1, gammaParam=1.0, startTree="BIONJ", paramOptimisation = "tlr", clean=False, verbosity=3 ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 self.inputFileName = inputFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 self.outFileName=outFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 self.dataType = dataType #"nt or aa"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 self._setSeqFormat(interleavedFormat) #if False -q"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 self.nbDataSets = nbDataSets
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 self.nbBootDataSets = nbBootDataSets
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 self.substModel = substModel
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 self.ratioTsTv = ratioTsTv
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 self.propInvSites = propInvSites # propInvSites="e" replaced by 0.0; should be in [0-1]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 self.nbCat = nbCat # Number of categories less than four or higher than eight are not recommended.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 self.gammaParam = gammaParam
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 self.startTree = startTree #by default is BIONJ used reformatedInputFileName+"_phyml_tree.txt" instead
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 self.paramOptimisation = paramOptimisation # used instead of self.optTopology="y", self.optBranchRate="y"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 #This option focuses on specific parameter optimisation.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 #tlr : tree topology (t), branch length (l) and rate parameters (r) are optimised.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 #tl : tree topology and branch length are optimised.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 #lr : branch length and rate parameters are optimised.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 #l : branch length are optimised.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 #r : rate parameters are optimised.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 #n : no parameter is optimised.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 self._clean = clean
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 self._verbosity = verbosity
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 def _setSeqFormat(self, interleavedFormat):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 if not (interleavedFormat) :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 self.seqFormat = " -q"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 else :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 self.seqFormat = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 def setAttributesFromCmdLine(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 description = "usage: LaunchPhyML.py [ options ]"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 epilog = "\n -h: this help\n"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 epilog += "\t -i: name of the input file (refseq is first, format='fasta')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 epilog += "\n\t"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 parser = RepetOptionParser(description = description, epilog = epilog)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 parser.add_option("-i", "--fasta", dest = "inputFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.out]", default = "")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 options = parser.parse_args()[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 self._setAttributesFromOptions(options)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 def _setAttributesFromOptions(self, options):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 self.inputFileName = options.inputFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 self.setOutFileName = options.outFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 self._verbosity = options.verbosity
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 def _checkOptions(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 if self.inputFileName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 self._logAndRaise("ERROR: Missing input file name")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 if self.outFileName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 self.outFileName = "%s_phyml.newick" % (self.inputFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 def _logAndRaise(self, errorMsg):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 self._log.error(errorMsg)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 raise Exception(errorMsg)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 def _shortenHeaders(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 self.csh = ChangeSequenceHeaders()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 self.csh.setInputFile(self.inputFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 self.csh.setFormat("fasta")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 self.csh.setStep(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 self.csh.setPrefix("seq")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 self.csh.setLinkFile(self.inputFileName+".shortHlink")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 self.csh.setOutputFile(self.inputFileName+".shortH")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 self.csh.setVerbosityLevel(self._verbosity-1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 self.csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 self.shortInputFileName = self.inputFileName+".shortH"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 def _renameHeaders(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 self.csh.setInputFile(self.phyml_tree)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 self.csh.setFormat("newick")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 self.csh.setStep(2)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 self.csh.setLinkFile(self.inputFileName+".shortHlink" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 self.csh.setOutputFile(self.outFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 self.csh.setVerbosityLevel(self._verbosity-1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 self.csh.run()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 def run(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 LoggerFactory.setLevel(self._log, self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 self._checkOptions()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 self._log.info("START LaunchPhyML")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 self._log.debug("building a multiple alignment from '%s'..." % ( self.inputFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 inputFileName = "%s/%s" % (os.getcwd(), os.path.basename(self.inputFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 if not os.path.exists(inputFileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 os.symlink(self.inputFileName, inputFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 self.inputFileName = inputFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 self._shortenHeaders()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 cmd = "sreformat phylip %s" % (self.shortInputFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 with open (self.reformatedInputFileName, "w") as fPhylip :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 process = subprocess.Popen(cmd.split(' '), stdout= fPhylip , stderr=subprocess.PIPE)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 self._log.debug("Running : %s" % cmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 output = process.communicate()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 self._log.debug("Output:\n%s" % output[0])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 if process.returncode != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 self._logAndRaise("ERROR when launching '%s'" % cmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 self.reformatedInputFileName = "%s.phylip" % self.shortInputFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 self.phyml_tree = "%s_phyml_tree.txt" %self.reformatedInputFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 cpyPhyml_tree = "%s_cpy" %self.phyml_tree
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 shutil.copyfile(self.phyml_tree,cpyPhyml_tree)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 cmd = "phyml -i %s -d %s%s -n %d -b %d -m %s -t %f -v %f -c %d -a %f -u %s -o %s" % (self.reformatedInputFileName, self.dataType, self.seqFormat, self.nbDataSets,self.nbBootDataSets,self.substModel, self.ratioTsTv, self.propInvSites,self.nbCat,self.gammaParam, cpyPhyml_tree , self.paramOptimisation )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 print cmd
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162 self._log.debug("Running : %s" % cmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 output = process.communicate()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 self._log.debug("Output:\n%s" % output[0])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 if process.returncode != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 self._logAndRaise("ERROR when launching '%s'" % cmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 self._renameHeaders()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 if self._clean:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 for f in [ self.shortInputFileName, self.inputFileName+".shortHlink", self.inputFileName+".shortH.phylip",self.inputFileName+".shortH.phylip_phyml_lk.txt", self.phyml_tree ]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 os.remove(f)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 os.system( "mv %s.phylip_phyml_stat.txt %s_phyml.txt" % ( self.shortInputFileName, self.inputFileName ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 self._log.info("Finished running LaunchPhyML")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177