| 18 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 # Copyright INRA (Institut National de la Recherche Agronomique) | 
|  | 4 # http://www.inra.fr | 
|  | 5 # http://urgi.versailles.inra.fr | 
|  | 6 # | 
|  | 7 # This software is governed by the CeCILL license under French law and | 
|  | 8 # abiding by the rules of distribution of free software.  You can  use, | 
|  | 9 # modify and/ or redistribute the software under the terms of the CeCILL | 
|  | 10 # license as circulated by CEA, CNRS and INRIA at the following URL | 
|  | 11 # "http://www.cecill.info". | 
|  | 12 # | 
|  | 13 # As a counterpart to the access to the source code and  rights to copy, | 
|  | 14 # modify and redistribute granted by the license, users are provided only | 
|  | 15 # with a limited warranty  and the software's author,  the holder of the | 
|  | 16 # economic rights,  and the successive licensors  have only  limited | 
|  | 17 # liability. | 
|  | 18 # | 
|  | 19 # In this respect, the user's attention is drawn to the risks associated | 
|  | 20 # with loading,  using,  modifying and/or developing or reproducing the | 
|  | 21 # software by the user in light of its specific status of free software, | 
|  | 22 # that may mean  that it is complicated to manipulate,  and  that  also | 
|  | 23 # therefore means  that it is reserved for developers  and  experienced | 
|  | 24 # professionals having in-depth computer knowledge. Users are therefore | 
|  | 25 # encouraged to load and test the software's suitability as regards their | 
|  | 26 # requirements in conditions enabling the security of their systems and/or | 
|  | 27 # data to be ensured and,  more generally, to use and operate it in the | 
|  | 28 # same conditions as regards security. | 
|  | 29 # | 
|  | 30 # The fact that you are presently reading this means that you have had | 
|  | 31 # knowledge of the CeCILL license and that you accept its terms. | 
|  | 32 | 
|  | 33 from commons.core.LoggerFactory import LoggerFactory | 
|  | 34 from commons.core.utils.RepetOptionParser import RepetOptionParser | 
|  | 35 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders | 
|  | 36 import subprocess | 
|  | 37 import os | 
|  | 38 from commons.core.seq.Bioseq import Bioseq | 
|  | 39 import shutil | 
|  | 40 | 
|  | 41 LOG_DEPTH = "repet.core.launchers" | 
|  | 42 | 
|  | 43 | 
|  | 44 | 
|  | 45 class LaunchPhyML(object): | 
|  | 46     """ | 
|  | 47     Launch 'PhyML' | 
|  | 48     """ | 
|  | 49     def __init__(self, inputFileName="", outFileName="",dataType= "nt", interleavedFormat= True, nbDataSets=1, nbBootDataSets=0, substModel="HKY85", ratioTsTv=4.0, propInvSites= 0.0, nbCat=1, gammaParam=1.0, startTree="BIONJ", paramOptimisation = "tlr", clean=False, verbosity=3 ): | 
|  | 50         self.inputFileName = inputFileName | 
|  | 51         self.outFileName=outFileName | 
|  | 52         self.dataType = dataType                    #"nt or aa" | 
|  | 53         self._setSeqFormat(interleavedFormat)       #if False -q" | 
|  | 54         self.nbDataSets = nbDataSets | 
|  | 55         self.nbBootDataSets = nbBootDataSets | 
|  | 56         self.substModel = substModel | 
|  | 57         self.ratioTsTv = ratioTsTv | 
|  | 58         self.propInvSites = propInvSites            # propInvSites="e" replaced by 0.0; should be in [0-1] | 
|  | 59         self.nbCat = nbCat                          # Number of categories less than four or higher than eight are not recommended. | 
|  | 60         self.gammaParam = gammaParam | 
|  | 61         self.startTree = startTree                  #by default is BIONJ used reformatedInputFileName+"_phyml_tree.txt" instead | 
|  | 62         self.paramOptimisation = paramOptimisation  # used instead of self.optTopology="y", self.optBranchRate="y" | 
|  | 63                                                     #This option focuses on specific parameter optimisation. | 
|  | 64                                                     #tlr : tree topology (t), branch length (l) and rate parameters (r) are optimised. | 
|  | 65                                                     #tl  : tree topology and branch length are optimised. | 
|  | 66                                                     #lr  : branch length and rate parameters are optimised. | 
|  | 67                                                     #l   : branch length are optimised. | 
|  | 68                                                     #r   : rate parameters are optimised. | 
|  | 69                                                     #n   : no parameter is optimised. | 
|  | 70 | 
|  | 71         self._clean = clean | 
|  | 72         self._verbosity = verbosity | 
|  | 73         self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) | 
|  | 74 | 
|  | 75     def _setSeqFormat(self, interleavedFormat): | 
|  | 76         if not (interleavedFormat) : | 
|  | 77             self.seqFormat = " -q" | 
|  | 78         else : | 
|  | 79             self.seqFormat = "" | 
|  | 80 | 
|  | 81     def setAttributesFromCmdLine(self): | 
|  | 82         description = "usage: LaunchPhyML.py [ options ]" | 
|  | 83         epilog = "\n -h: this help\n" | 
|  | 84         epilog += "\t -i: name of the input file (refseq is first, format='fasta')" | 
|  | 85         epilog += "\n\t" | 
|  | 86         parser = RepetOptionParser(description = description, epilog = epilog) | 
|  | 87         parser.add_option("-i", "--fasta",      dest = "inputFileName", action = "store",       type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "") | 
|  | 88         parser.add_option("-o", "--out",        dest = "outFileName",   action = "store",       type = "string", help = "output file name [default: <input>.out]", default = "") | 
|  | 89         parser.add_option("-v", "--verbosity",  dest = "verbosity",     action = "store",       type = "int",    help = "verbosity [optional] [default: 1]", default = 1) | 
|  | 90         options = parser.parse_args()[0] | 
|  | 91         self._setAttributesFromOptions(options) | 
|  | 92 | 
|  | 93     def _setAttributesFromOptions(self, options): | 
|  | 94         self.inputFileName = options.inputFileName | 
|  | 95         self.setOutFileName = options.outFileName | 
|  | 96         self._verbosity = options.verbosity | 
|  | 97 | 
|  | 98     def _checkOptions(self): | 
|  | 99         if self.inputFileName == "": | 
|  | 100             self._logAndRaise("ERROR: Missing input file name") | 
|  | 101 | 
|  | 102         if self.outFileName == "": | 
|  | 103             self.outFileName = "%s_phyml.newick" % (self.inputFileName) | 
|  | 104 | 
|  | 105     def _logAndRaise(self, errorMsg): | 
|  | 106         self._log.error(errorMsg) | 
|  | 107         raise Exception(errorMsg) | 
|  | 108 | 
|  | 109     def _shortenHeaders(self): | 
|  | 110         self.csh = ChangeSequenceHeaders() | 
|  | 111         self.csh.setInputFile(self.inputFileName) | 
|  | 112         self.csh.setFormat("fasta") | 
|  | 113         self.csh.setStep(1) | 
|  | 114         self.csh.setPrefix("seq") | 
|  | 115         self.csh.setLinkFile(self.inputFileName+".shortHlink") | 
|  | 116         self.csh.setOutputFile(self.inputFileName+".shortH") | 
|  | 117         self.csh.setVerbosityLevel(self._verbosity-1) | 
|  | 118         self.csh.run() | 
|  | 119         self.shortInputFileName = self.inputFileName+".shortH" | 
|  | 120 | 
|  | 121     def _renameHeaders(self): | 
|  | 122         self.csh.setInputFile(self.phyml_tree) | 
|  | 123         self.csh.setFormat("newick") | 
|  | 124         self.csh.setStep(2) | 
|  | 125         self.csh.setLinkFile(self.inputFileName+".shortHlink" ) | 
|  | 126         self.csh.setOutputFile(self.outFileName) | 
|  | 127         self.csh.setVerbosityLevel(self._verbosity-1) | 
|  | 128         self.csh.run() | 
|  | 129 | 
|  | 130     def run(self): | 
|  | 131         LoggerFactory.setLevel(self._log, self._verbosity) | 
|  | 132         self._checkOptions() | 
|  | 133         self._log.info("START LaunchPhyML") | 
|  | 134         self._log.debug("building a multiple alignment from '%s'..." % ( self.inputFileName)) | 
|  | 135 | 
|  | 136         inputFileName = "%s/%s" % (os.getcwd(), os.path.basename(self.inputFileName)) | 
|  | 137         if not os.path.exists(inputFileName): | 
|  | 138             os.symlink(self.inputFileName, inputFileName) | 
|  | 139         self.inputFileName = inputFileName | 
|  | 140 | 
|  | 141         self._shortenHeaders() | 
|  | 142 | 
|  | 143         cmd = "sreformat phylip %s" % (self.shortInputFileName) | 
|  | 144 | 
|  | 145         with open (self.reformatedInputFileName, "w") as fPhylip : | 
|  | 146 | 
|  | 147             process = subprocess.Popen(cmd.split(' '), stdout= fPhylip , stderr=subprocess.PIPE) | 
|  | 148             self._log.debug("Running : %s" % cmd) | 
|  | 149             output = process.communicate() | 
|  | 150             self._log.debug("Output:\n%s" % output[0]) | 
|  | 151             if process.returncode != 0: | 
|  | 152                 self._logAndRaise("ERROR when launching '%s'" % cmd) | 
|  | 153 | 
|  | 154         self.reformatedInputFileName = "%s.phylip" % self.shortInputFileName | 
|  | 155         self.phyml_tree = "%s_phyml_tree.txt" %self.reformatedInputFileName | 
|  | 156         cpyPhyml_tree = "%s_cpy" %self.phyml_tree | 
|  | 157         shutil.copyfile(self.phyml_tree,cpyPhyml_tree) | 
|  | 158 | 
|  | 159         cmd = "phyml -i %s -d %s%s -n %d -b %d -m %s -t %f -v %f -c %d -a %f -u %s -o %s" % (self.reformatedInputFileName, self.dataType, self.seqFormat, self.nbDataSets,self.nbBootDataSets,self.substModel, self.ratioTsTv, self.propInvSites,self.nbCat,self.gammaParam, cpyPhyml_tree , self.paramOptimisation ) | 
|  | 160         print cmd | 
|  | 161         process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 
|  | 162         self._log.debug("Running : %s" % cmd) | 
|  | 163         output = process.communicate() | 
|  | 164         self._log.debug("Output:\n%s" % output[0]) | 
|  | 165         if process.returncode != 0: | 
|  | 166             self._logAndRaise("ERROR when launching '%s'" % cmd) | 
|  | 167 | 
|  | 168         self._renameHeaders() | 
|  | 169 | 
|  | 170         if self._clean: | 
|  | 171             for f in [ self.shortInputFileName, self.inputFileName+".shortHlink", self.inputFileName+".shortH.phylip",self.inputFileName+".shortH.phylip_phyml_lk.txt", self.phyml_tree ]: | 
|  | 172                 os.remove(f) | 
|  | 173             os.system( "mv %s.phylip_phyml_stat.txt %s_phyml.txt" % ( self.shortInputFileName, self.inputFileName ) ) | 
|  | 174 | 
|  | 175         self._log.info("Finished running LaunchPhyML") | 
|  | 176 | 
|  | 177 |