6
|
1 #! /usr/bin/env python
|
|
2 #
|
|
3 # Copyright INRA-URGI 2009-2011
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30 #
|
|
31 import random
|
|
32 from optparse import OptionParser
|
|
33 from commons.core.parsing.ParserChooser import ParserChooser
|
|
34 from commons.core.writer.TranscriptWriter import TranscriptWriter
|
|
35 from SMART.Java.Python.structure.Transcript import Transcript
|
|
36 from SMART.Java.Python.structure.Interval import Interval
|
|
37 from SMART.Java.Python.misc.Progress import Progress
|
|
38 from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection
|
|
39 from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
|
|
40
|
|
41
|
|
42 OPERATIONS = ("diff", "div")
|
|
43 BOOLTOSTRANDS = {True: [0], False: [-1, 1]}
|
|
44
|
|
45 class ClusterizeByTags(object):
|
|
46
|
|
47 def __init__(self, verbosity):
|
|
48 self.verbosity = verbosity
|
|
49 self.connection = MySqlConnection(self.verbosity-1)
|
|
50 self.defautValue = None
|
|
51 self.maxDistance = None
|
|
52 self.oneStrand = False
|
|
53
|
|
54 def setInputFile(self, fileName, format):
|
|
55 chooser = ParserChooser(self.verbosity)
|
|
56 chooser.findFormat(format)
|
|
57 parser = chooser.getParser(fileName)
|
|
58 writer = MySqlTranscriptWriter(self.connection, None, self.verbosity)
|
|
59 writer.addTranscriptList(parser)
|
|
60 writer.write()
|
|
61 self.transcriptTables = writer.getTables()
|
|
62
|
|
63 def setOutputFile(self, fileName):
|
|
64 self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
|
|
65
|
|
66 def setTag(self, tagName, defaultValue):
|
|
67 self.tagName = tagName
|
|
68 self.defaultValue = defaultValue
|
|
69
|
|
70 def setThreshold(self, threshold):
|
|
71 self.threshold = threshold
|
|
72
|
|
73 def setOperation(self, operation):
|
|
74 self.operation = operation
|
|
75 if self.operation not in OPERATIONS:
|
|
76 raise Exception("Operation '%s' unsupported: choose among %s" % (self.operation, ", ".join(OPERATIONS)))
|
|
77
|
|
78 def setMaxDistance(self, distance):
|
|
79 self.maxDistance = distance
|
|
80
|
|
81 def setOneStrand(self, oneStrand):
|
|
82 self.oneStrand = oneStrand
|
|
83
|
|
84 def run(self):
|
|
85 for chromosome in sorted(self.transcriptTables.keys()):
|
|
86 progress = Progress(self.transcriptTables[chromosome].getNbElements(), "Analyzing %s" % (chromosome), self.verbosity)
|
|
87 for strand in BOOLTOSTRANDS[self.oneStrand]:
|
|
88 previousValue = None
|
|
89 previousTrend = None
|
|
90 previousTranscript = None
|
|
91 sumValue = 0
|
|
92 command = "SELECT * FROM %s" % (self.transcriptTables[chromosome].getName())
|
|
93 if not self.oneStrand:
|
|
94 command += " WHERE direction = %d" % (strand)
|
|
95 command += " ORDER BY start, end"
|
|
96 for index, transcript in self.transcriptTables[chromosome].selectTranscripts(command):
|
|
97 if self.tagName in transcript.getTagNames():
|
|
98 value = transcript.getTagValue(self.tagName)
|
|
99 else:
|
|
100 value = self.defaultValue
|
|
101 if previousValue == None:
|
|
102 trend = None
|
|
103 else:
|
|
104 if self.operation == "diff":
|
|
105 trend = value - previousValue
|
|
106 else:
|
|
107 trend = value / previousValue
|
|
108 if previousTranscript == None:
|
|
109 sumValue = value
|
|
110 elif (previousTrend == None or abs(trend - previousTrend) <= self.threshold) and (self.maxDistance == None or previousTranscript.getDistance(transcript) <= self.maxDistance) and (previousTranscript.getDirection() == transcript.getDirection() or not self.oneStrand):
|
|
111 if previousTranscript.getDirection() != transcript.getDirection():
|
|
112 transcript.reverse()
|
|
113 previousTranscript.merge(transcript)
|
|
114 transcript = previousTranscript
|
|
115 sumValue += value
|
|
116 previousTrend = trend
|
|
117 else:
|
|
118 previousTranscript.setTagValue(self.tagName, sumValue)
|
|
119 self.writer.addTranscript(previousTranscript)
|
|
120 sumValue = value
|
|
121 previousTrend = None
|
|
122 previousValue = value
|
|
123 previousTranscript = transcript
|
|
124 progress.inc()
|
|
125 if previousTranscript != None:
|
|
126 previousTranscript.setTagValue(self.tagName, sumValue)
|
|
127 self.writer.addTranscript(previousTranscript)
|
|
128 progress.done()
|
|
129 self.writer.close()
|
|
130
|
|
131
|
|
132 if __name__ == "__main__":
|
|
133
|
|
134 description = "Clusterize By Tags v1.0.1: Clusterize a set of element using their tag values. [Category: Merge]"
|
|
135
|
|
136 parser = OptionParser(description = description)
|
|
137 parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
|
|
138 parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")
|
|
139 parser.add_option("-t", "--tag", dest="tagName", action="store", type="string", help="name of the tag [format: string] [compulsory]")
|
|
140 parser.add_option("-e", "--default", dest="defaultValue", action="store", default=None, type="int", help="default value for the tag [format: string]")
|
|
141 parser.add_option("-r", "--threshold", dest="threshold", action="store", type="int", help="threshold between two consecutive tags [format: int] [compulsory]")
|
|
142 parser.add_option("-p", "--operation", dest="operation", action="store", type="string", help="operation to apply between 2 different clusters to compare them [format: choice (diff, div)] [compulsory]")
|
|
143 parser.add_option("-d", "--distance", dest="maxDistance", action="store", default=None, type="int", help="maximum distance for 2 clusters to be merged [format: int] [default: None]")
|
|
144 parser.add_option("-1", "--oneStrand", dest="oneStrand", action="store_true", default=False, help="also cluster the elements which are on different strands [format: bool] [default: False]")
|
|
145 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")
|
|
146 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
|
|
147 (options, args) = parser.parse_args()
|
|
148
|
|
149 cbt = ClusterizeByTags(options.verbosity)
|
|
150 cbt.setInputFile(options.inputFileName, options.format)
|
|
151 cbt.setOutputFile(options.outputFileName)
|
|
152 cbt.setTag(option.tagName, option.defaultValue)
|
|
153 cbt.setThreshold(option.threshold)
|
|
154 cbt.setOperation(option.operation)
|
|
155 cbt.setMaxDistance(operation.maxDistance)
|
|
156 cbt.setOneStrand(operation.oneStrand)
|
|
157 cbt.run()
|