diff commons/launcher/LaunchMap.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchMap.py	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.LoggerFactory import LoggerFactory
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB
+from commons.core.seq.FastaUtils import FastaUtils
+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
+from commons.core.utils.FileUtils import FileUtils
+import os
+import subprocess
+
+LOG_DEPTH = "repet.tools"
+
+##Reference launcher implementation
+#
+class LaunchMap(object):
+    
+    def __init__(self, fastaFileName = "", outFileName = "", gapSize = 50, mismatchPenalty = -8, gapOpenPenalty = 16, gapExtendPenalty = 4, doClean = False, verbosity = 0):
+        self._fastaFileName = fastaFileName
+        self.setOutFileName(outFileName)
+        self._gapSize = gapSize
+        self._mismatchPenalty = mismatchPenalty
+        self._gapOpenPenalty = gapOpenPenalty
+        self._gapExtendPenalty = gapExtendPenalty
+        self._doClean = doClean
+        self._verbosity = verbosity
+        self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
+        
+    def setAttributesFromCmdLine(self):
+#        description = "Launch template to create a launcher."
+#        epilog = "\nExample 1: launch without verbosity and keep temporary files.\n"
+#        epilog += "\t$ python LaunchTemplate.py -i file.fa -v 0"
+#        epilog += "\n\t"
+#        epilog += "\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\n"
+#        epilog += "\t$ python LaunchTemplate.py -i file.fa -c -v 2"
+#        parser = RepetOptionParser(description = description, epilog = epilog)
+        parser = RepetOptionParser(description = "", epilog = "")
+        parser.add_option("-i", "--fasta",      dest = "fastaFileName", action = "store",       type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "")
+        parser.add_option("-o", "--out",        dest = "outFileName",   action = "store",       type = "string", help = "output file name [default: <input>.fa_aln]", default = "")
+        parser.add_option("-s", "--gapSize",    dest = "gapSize",       action = "store",       type = "int",    help = "size above which a gap is not penalized anymore [optional] [default: 50]", default = 50)
+        parser.add_option("-m", "--mismatch",   dest = "mismatch",      action = "store",       type = "int",    help = "penalty for a mismatch [optional] [default: -8]", default = -8)
+        parser.add_option("-O", "--gapOpen",    dest = "gapOpen",       action = "store",       type = "int",    help = "penalty for a gap opening [optional] [default: 16]", default = 16)
+        parser.add_option("-e", "--gapExtend",  dest = "gapExtend",     action = "store",       type = "int",    help = "penalty for a gap extension [optional] [default: 4]", default = 4)
+        parser.add_option("-c", "--clean",      dest = "doClean",       action = "store_true",                   help = "clean temporary files [optional] [default: False]", default = False)
+        parser.add_option("-v", "--verbosity",  dest = "verbosity",     action = "store",       type = "int",    help = "verbosity [optional] [default: 1]", default = 1)
+        options = parser.parse_args()[0]
+        self._setAttributesFromOptions(options)
+        
+    def _setAttributesFromOptions(self, options):
+        self.setFastaFileName(options.fastaFileName)
+        self.setOutFileName(options.outFileName)
+        self.setGapSize(options.gapSize)
+        self.setMismatchPenalty(options.mismatch)
+        self.setGapOpenPenalty(options.gapOpen)
+        self.setGapExtendPenalty(options.gapExtend)
+        self.setDoClean(options.doClean)
+        self.setVerbosity(options.verbosity)
+
+    def setFastaFileName(self, fastaFileName):
+        self._fastaFileName = fastaFileName
+        
+    def setOutFileName(self, outFileName):
+        if outFileName == "":
+            self._outFileName = "%s.fa_aln" % self._fastaFileName
+        else:
+            self._outFileName = outFileName
+        
+    def setGapSize(self, gapSize):
+        self._gapSize = gapSize
+
+    def setMismatchPenalty(self, mismatchPenalty):
+        self._mismatchPenalty = mismatchPenalty
+        
+    def setGapOpenPenalty(self, gapOpenPenalty):
+        self._gapOpenPenalty = gapOpenPenalty
+        
+    def setGapExtendPenalty(self, gapExtendPenalty):
+        self._gapExtendPenalty = gapExtendPenalty
+        
+    def setDoClean(self, doClean):
+        self._doClean = doClean
+        
+    def setVerbosity(self, verbosity):
+        self._verbosity = verbosity
+        
+    def _checkOptions(self):
+        if self._fastaFileName == "":
+            self._logAndRaise("ERROR: Missing input fasta file name")
+        if not FileUtils.isRessourceExists(self._fastaFileName):
+            self._logAndRaise("ERROR: Input fasta file name %s doesn't exist." % self._fastaFileName)
+            
+    def _logAndRaise(self, errorMsg):
+        self._log.error(errorMsg)
+        raise Exception(errorMsg)
+    
+    def getMapCmd(self):
+        cmd = "rpt_map"
+        cmd += " %s.shortH" % self._fastaFileName
+        cmd += " %i" % self._gapSize
+        cmd += " %i" % self._mismatchPenalty
+        cmd += " %i" % self._gapOpenPenalty
+        cmd += " %i" % self._gapExtendPenalty
+        cmd += " > %s.shortH.fa_aln" % self._fastaFileName
+        return cmd
+                    
+    def run(self):
+        LoggerFactory.setLevel(self._log, self._verbosity)
+        self._checkOptions()
+        self._log.info("START LaunchMap")
+        self._log.debug("Fasta file name: %s" % self._fastaFileName)
+        
+        lInitHeaders = FastaUtils.dbHeaders(self._fastaFileName, self._verbosity - 1)
+        
+        csh = ChangeSequenceHeaders()
+        csh.setInputFile(self._fastaFileName)
+        csh.setFormat("fasta")
+        csh.setStep(1)
+        csh.setPrefix("seq")
+        csh.setLinkFile("%s.shortHlink" % self._fastaFileName)
+        csh.setOutputFile("%s.shortH" % self._fastaFileName)
+        csh.setVerbosityLevel(self._verbosity - 1)
+        csh.run()
+        
+        cmd = self.getMapCmd()
+        process = subprocess.Popen(cmd, shell = True)
+        self._log.debug("Running : %s" % cmd)
+        process.communicate()
+        if process.returncode != 0:
+            self._logAndRaise("ERROR when launching '%s'" % cmd)
+            
+        csh.setInputFile("%s.shortH.fa_aln" % self._fastaFileName)
+        csh.setFormat("fasta")
+        csh.setStep(2)
+        csh.setLinkFile("%s.shortHlink" % self._fastaFileName)
+        csh.setOutputFile("%s.shortH.fa_aln.initH" % self._fastaFileName)
+        csh.setVerbosityLevel(self._verbosity - 1)
+        csh.run()
+        
+        absDB = AlignedBioseqDB("%s.shortH.fa_aln.initH" % self._fastaFileName)
+        outFileHandler = open(self._outFileName, "w")
+        for header in lInitHeaders:
+            bs = absDB.fetch(header)
+            bs.upCase()
+            bs.write(outFileHandler)
+        outFileHandler.close()
+        if self._doClean:
+            os.remove("%s.shortH" % self._fastaFileName)
+            os.remove("%s.shortHlink" % self._fastaFileName)
+            os.remove("%s.shortH.fa_aln" % self._fastaFileName)
+            os.remove("%s.shortH.fa_aln.initH" % self._fastaFileName)
+        self._log.info("END Launch")
+
+if __name__ == "__main__":
+    iLaunch = LaunchMap()
+    iLaunch.setAttributesFromCmdLine()
+    iLaunch.run()        
\ No newline at end of file