annotate SMART/Java/Python/ClusterizeByTags.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 #! /usr/bin/env python
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 # Copyright INRA-URGI 2009-2011
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 # "http://www.cecill.info".
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 # liability.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 # same conditions as regards security.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 import random
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 from optparse import OptionParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 from commons.core.parsing.ParserChooser import ParserChooser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 from commons.core.writer.TranscriptWriter import TranscriptWriter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 from SMART.Java.Python.structure.Transcript import Transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 from SMART.Java.Python.structure.Interval import Interval
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 from SMART.Java.Python.misc.Progress import Progress
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 OPERATIONS = ("diff", "div")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 BOOLTOSTRANDS = {True: [0], False: [-1, 1]}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 class ClusterizeByTags(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 def __init__(self, verbosity):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 self.verbosity = verbosity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 self.connection = MySqlConnection(self.verbosity-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 self.defautValue = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 self.maxDistance = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 self.oneStrand = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 def setInputFile(self, fileName, format):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 chooser = ParserChooser(self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 chooser.findFormat(format)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 parser = chooser.getParser(fileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 writer = MySqlTranscriptWriter(self.connection, None, self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 writer.addTranscriptList(parser)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 writer.write()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 self.transcriptTables = writer.getTables()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 def setOutputFile(self, fileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66 def setTag(self, tagName, defaultValue):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 self.tagName = tagName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68 self.defaultValue = defaultValue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 def setThreshold(self, threshold):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 self.threshold = threshold
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 def setOperation(self, operation):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 self.operation = operation
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 if self.operation not in OPERATIONS:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 raise Exception("Operation '%s' unsupported: choose among %s" % (self.operation, ", ".join(OPERATIONS)))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 def setMaxDistance(self, distance):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 self.maxDistance = distance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81 def setOneStrand(self, oneStrand):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 self.oneStrand = oneStrand
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84 def run(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 for chromosome in sorted(self.transcriptTables.keys()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 progress = Progress(self.transcriptTables[chromosome].getNbElements(), "Analyzing %s" % (chromosome), self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 for strand in BOOLTOSTRANDS[self.oneStrand]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88 previousValue = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89 previousTrend = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 previousTranscript = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91 sumValue = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92 command = "SELECT * FROM %s" % (self.transcriptTables[chromosome].getName())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93 if not self.oneStrand:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94 command += " WHERE direction = %d" % (strand)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95 command += " ORDER BY start, end"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96 for index, transcript in self.transcriptTables[chromosome].selectTranscripts(command):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97 if self.tagName in transcript.getTagNames():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98 value = transcript.getTagValue(self.tagName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 value = self.defaultValue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 if previousValue == None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102 trend = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104 if self.operation == "diff":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105 trend = value - previousValue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107 trend = value / previousValue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108 if previousTranscript == None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109 sumValue = value
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 elif (previousTrend == None or abs(trend - previousTrend) <= self.threshold) and (self.maxDistance == None or previousTranscript.getDistance(transcript) <= self.maxDistance) and (previousTranscript.getDirection() == transcript.getDirection() or not self.oneStrand):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111 if previousTranscript.getDirection() != transcript.getDirection():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112 transcript.reverse()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113 previousTranscript.merge(transcript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114 transcript = previousTranscript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115 sumValue += value
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 previousTrend = trend
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118 previousTranscript.setTagValue(self.tagName, sumValue)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119 self.writer.addTranscript(previousTranscript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120 sumValue = value
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 previousTrend = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122 previousValue = value
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123 previousTranscript = transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125 if previousTranscript != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 previousTranscript.setTagValue(self.tagName, sumValue)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127 self.writer.addTranscript(previousTranscript)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129 self.writer.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132 if __name__ == "__main__":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134 description = "Clusterize By Tags v1.0.1: Clusterize a set of element using their tag values. [Category: Merge]"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136 parser = OptionParser(description = description)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137 parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138 parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139 parser.add_option("-t", "--tag", dest="tagName", action="store", type="string", help="name of the tag [format: string] [compulsory]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 parser.add_option("-e", "--default", dest="defaultValue", action="store", default=None, type="int", help="default value for the tag [format: string]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 parser.add_option("-r", "--threshold", dest="threshold", action="store", type="int", help="threshold between two consecutive tags [format: int] [compulsory]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142 parser.add_option("-p", "--operation", dest="operation", action="store", type="string", help="operation to apply between 2 different clusters to compare them [format: choice (diff, div)] [compulsory]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143 parser.add_option("-d", "--distance", dest="maxDistance", action="store", default=None, type="int", help="maximum distance for 2 clusters to be merged [format: int] [default: None]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144 parser.add_option("-1", "--oneStrand", dest="oneStrand", action="store_true", default=False, help="also cluster the elements which are on different strands [format: bool] [default: False]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147 (options, args) = parser.parse_args()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149 cbt = ClusterizeByTags(options.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 cbt.setInputFile(options.inputFileName, options.format)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151 cbt.setOutputFile(options.outputFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152 cbt.setTag(option.tagName, option.defaultValue)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153 cbt.setThreshold(option.threshold)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154 cbt.setOperation(option.operation)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155 cbt.setMaxDistance(operation.maxDistance)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
156 cbt.setOneStrand(operation.oneStrand)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
157 cbt.run()