diff SMART/Java/Python/SelectByTag.py @ 38:2c0c0a89fad7

Uploaded
author m-zytnicki
date Thu, 02 May 2013 09:56:47 -0400
parents 769e306b7933
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/SelectByTag.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,148 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Select the transcript such that a tag value is not less than a given threshold"""
+import os
+import sys
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer import MySqlTranscriptWriter
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.RPlotter import RPlotter
+
+class SelectByTag(object):
+    
+    def __init__(self, verbosity = 1):
+        self.input     = None
+        self.format    = None
+        self.tag       = None
+        self.value     = None
+        self.min       = None
+        self.max       = None
+        self.default   = None
+        self.output    = None
+        self.mysql     = None
+        self.verbosity = verbosity
+
+        self.parser      = None
+        self.writer      = None
+        self.mysqlWriter = None
+        self.nbElements  = None
+        self.nbWritten   = 0
+
+    
+    def setParser(self):
+        self.parser     = TranscriptContainer(self.input, self.format, self.verbosity)
+        self.nbElements = self.parser.getNbTranscripts()
+
+
+    def setWriter(self):
+        self.writer = Gff3Writer(self.output, self.verbosity)
+        if self.mysql:
+            self.mysqlWriter = MySqlTranscriptWriter(self.output, self.verbosity)
+
+
+    def isAccepted(self, transcript):
+        value = transcript.getTagValue(self.tag)
+        if value == None:
+            if self.default != None:
+                value = self.default
+            else:
+                raise Exception("Error! Transcript %s no tag called '%s'" % (transcript, self.tag))
+        if self.value != None:
+            if self.value == str(value):
+                return True
+            return self.value.isdigit() and value == float(self.value)
+        value = float(value)
+        return (self.min == None or self.min <= value) and (self.max == None or self.max >= value)
+
+
+    def readInputFile(self):
+        progress = Progress(self.parser.getNbTranscripts(), "Writing transcripts", self.verbosity)
+        for transcript in self.parser.getIterator():
+            if self.isAccepted(transcript):
+                self.writer.addTranscript(transcript)
+                if self.mysql:
+                    self.mysqlWriter.addTranscript(transcript)
+                self.nbWritten += 1
+            progress.inc()
+        progress.done()
+
+
+    def writeFile(self):
+        self.writer.write()
+        if self.mysql:
+            self.mysqlWriter.write()
+
+    
+    def run(self):
+        self.setParser()
+        self.setWriter()
+        self.readInputFile()
+        self.writeFile()
+        if self.verbosity > 0:
+            print "%d input" % (self.nbElements)
+            if self.nbElements != 0:
+                print "%d output (%.2f%%)" % (self.nbWritten, float(self.nbWritten) / self.nbElements * 100)
+
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Select by Tag v1.0.2: Keep the genomic coordinates such that a the value of a given tag is between two limits. [Category: Data Selection]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of the input [compulsory] [format: transcript file format]")
+    parser.add_option("-g", "--tag", dest="tag", action="store", default=None, type="string", help="the tag [compulsory] [format: string]")     
+    parser.add_option("-a", "--value", dest="value", action="store", default=None, type="string", help="the value to be found [format: string]")     
+    parser.add_option("-m", "--min", dest="min", action="store", default=None, type="float", help="the minimum threshold [format: float]")     
+    parser.add_option("-M", "--max", dest="max", action="store", default=None, type="float", help="the maximum threshold [format: float]")     
+    parser.add_option("-d", "--default", dest="default", action="store", default=None, type="float", help="value if tag is not present [format: float]")     
+    parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-y", "--mysql", dest="mysql", action="store_true", default=False, help="write output into MySQL tables [format: boolean] [default: False]")
+    parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    selectByTag         = SelectByTag(options.verbosity)
+    selectByTag.input   = options.inputFileName
+    selectByTag.format  = options.format
+    selectByTag.tag     = options.tag
+    selectByTag.value   = options.value
+    selectByTag.min     = options.min
+    selectByTag.max     = options.max
+    selectByTag.default = options.default
+    selectByTag.output  = options.outputFileName
+    selectByTag.mysql   = options.mysql
+    selectByTag.run()
+
+