comparison smart_toolShed/SMART/Java/Python/plotTranscriptList.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e0f8dcca02ed
1 #! /usr/bin/env python
2 #
3 # Copyright INRA-URGI 2009-2010
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30 #
31 """
32 Plot the data from the data files
33 """
34 import sys
35 import math
36 from optparse import OptionParser
37 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
38 from SMART.Java.Python.misc.RPlotter import RPlotter
39
40
41 class PlotTranscriptList(object):
42
43 def __init__(self, verbosity = 0):
44 self.inputFileName = None
45 self.format = None
46 self.x = None
47 self.y = None
48 self.z = None
49 self.xDefault = None
50 self.yDefault = None
51 self.zDefault = None
52 self.xLabel = None
53 self.yLabel = None
54 self.shape = None
55 self.bucket = None
56 self.keep = None
57 self.log = None
58 self.verbosity = None
59
60
61 def setPlotter(self, outputFileName, keep, log, xLabel, yLabel):
62 self.plotter = RPlotter(outputFileName, self.verbosity, keep)
63 if self.shape != "barplot":
64 self.plotter.setLog(log)
65 self.plotter.setXLabel(xLabel)
66 self.plotter.setYLabel(yLabel)
67
68
69 def setShape(self, shape):
70 if self.shape == "line":
71 pass
72 elif shape == "barplot":
73 self.plotter.setBarplot(True)
74 elif shape == "points":
75 self.plotter.setPoints(True)
76 elif shape == "heatPoints":
77 self.plotter.setHeatPoints(True)
78 else:
79 sys.exit("Do not understand shape '%s'" % (shape))
80
81
82 def setInput(self, inputFileName, format):
83 self.parser = TranscriptContainer(inputFileName, format, self.verbosity)
84
85
86 def getValues(self, transcript):
87 x, y, z = None, None, None
88 x = transcript.getTagValue(self.x)
89 if self.y != None:
90 y = transcript.getTagValue(self.y)
91 if self.z != None:
92 z = transcript.getTagValue(self.z)
93 if x == None:
94 if self.xDefault != None:
95 x = self.xDefault
96 else:
97 sys.exit("Error! Transcript %s do not have the x-tag %s" % (transcript, self.x))
98 if y == None and self.shape != "line" and self.shape != "barplot":
99 if self.yDefault != None:
100 y = self.yDefault
101 else:
102 sys.exit("Error! Transcript %s do not have the y-tag %s" % (transcript, self.y))
103 if self.z != None:
104 if z == None:
105 if self.zDefault != None:
106 z = self.zDefault
107 else:
108 sys.exit("Error! Transcript %s do not have the z-tag %s" % (transcript, self.z))
109 x = float(x)
110 if self.y != None:
111 y = float(y)
112 if self.z != None:
113 z = float(z)
114 return (x, y, z)
115
116
117 def readFile(self):
118 cpt = 1
119 line = {}
120 heatLine = {}
121 for transcript in self.parser.getIterator():
122 x, y, z = self.getValues(transcript)
123
124 name = transcript.name
125 if name == "unnamed transcript":
126 name = "transcript %d" % (cpt)
127 cpt += 1
128 if self.shape == "points":
129 line[name] = (x, y)
130 elif self.shape == "heatPoints":
131 line[name] = (x, y)
132 heatLine[name] = z
133 elif self.shape == "line" or self.shape == "barplot":
134 if x not in line:
135 line[x] = 1
136 else:
137 line[x] += 1
138 else:
139 sys.exit("Do not understand shape '%s'" % (self.shape))
140 return line, heatLine
141
142
143 def putLineInBuckets(self, line):
144 tmpLine = line
145 line = {}
146 for key, value in tmpLine.iteritems():
147 line[int(key / float(self.bucket)) * self.bucket] = value
148 return line
149
150
151 def clusterInBarplot(self, line):
152 nbZeros = 0
153 minValue = min(line.keys())
154 maxValue = max(line.keys())
155 if self.log != "":
156 if minValue == 0:
157 minValue = 1000000000
158 for value in line:
159 if value < minValue:
160 if value == 0:
161 nbZeros += 1
162 else:
163 minValue = value
164 minValue = math.log(minValue)
165 maxValue = math.log(maxValue)
166 bucketSize = (maxValue - minValue) / self.bucket
167 tmpLine = line
168 line = {}
169 for i in range(int(self.bucket) + 1):
170 line[i * bucketSize + minValue] = 0
171 for key, value in tmpLine.iteritems():
172 if self.log != "" and key != 0:
173 key = math.log(key)
174 bucketKey = int((key - minValue) / bucketSize) * bucketSize + minValue
175 if self.log == "" or key != 0:
176 line[bucketKey] += value
177 # if self.log != "":
178 # tmpLine = line
179 # line = {}
180 # for key, value in tmpLine.iteritems():
181 # line[math.exp(key)] = value
182 print "%d zeros have been removed" % (nbZeros)
183 return line
184
185
186 def getSpearmanRho(self):
187 rho = self.plotter.getSpearmanRho()
188 if rho == None:
189 print "Cannot compute Spearman rho."
190 else:
191 print "Spearman rho: %f" % (rho)
192
193
194 def run(self):
195 line, heatLine = self.readFile()
196
197 if self.shape == "line" and self.bucket != None:
198 line = self.putLineInBuckets(line)
199 if self.shape == "barplot":
200 line = self.clusterInBarplot(line)
201
202 if self.shape == "points" or self.shape == "barplot" or self.shape == "line":
203 self.plotter.addLine(line)
204 elif self.shape == "heatPoints":
205 self.plotter.addLine(line)
206 self.plotter.addHeatLine(heatLine)
207 else:
208 sys.exit("Do not understand shape '%s'" % (self.shape))
209
210 self.plotter.plot()
211
212 if self.shape == "points" or self.shape == "heatPoints":
213 self.getSpearmanRho()
214
215
216
217 if __name__ == "__main__":
218
219 # parse command line
220 description = "Plot v1.0.2: Plot some information from a list of transcripts. [Category: Visualization]"
221
222 parser = OptionParser(description = description)
223 parser.add_option("-i", "--input",dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
224 parser.add_option("-f", "--format",dest="format", action="store",type="string", help="format of the input [compulsory] [format: transcript file format]")
225 parser.add_option("-x", "--x",dest="x",action="store", type="string", help="tag for the x value [format: string]")
226 parser.add_option("-y", "--y",dest="y",action="store", type="string", help="tag for the y value [format: string]")
227 parser.add_option("-z", "--z",dest="z", action="store", default=None,type="string", help="tag for the z value [format: string]")
228 parser.add_option("-X", "--xDefault",dest="xDefault",action="store", default=None,type="float",help="value for x when tag is not present [format: float]")
229 parser.add_option("-Y", "--yDefault",dest="yDefault",action="store",default=None,type="float",help="value for y when tag is not present [format: float]")
230 parser.add_option("-Z", "--zDefault",dest="zDefault", action="store",default=None,type="float",help="value for z when tag is not present [format: float]")
231 parser.add_option("-n", "--xLabel",dest="xLabel",action="store",default="",type="string", help="label on the x-axis [format: string] [default: ]")
232 parser.add_option("-m", "--yLabel",dest="yLabel",action="store",default="", type="string", help="label on the y-axis [format: string] [default: ]")
233 parser.add_option("-o", "--output",dest="outputFileName",action="store",type="string", help="output file names [format: output file in PNG format]")
234 parser.add_option("-s", "--shape",dest="shape",action="store", type="string", help="shape of the plot [format: choice (barplot, line, points, heatPoints)]")
235 parser.add_option("-b", "--bucket",dest="bucket",action="store",default=None,type="float",help="bucket size (for the line plot) [format: int] [default: 1]")
236 parser.add_option("-k", "--keep",dest="keep",action="store_true", default=False, help="keep temporary files [format: bool]")
237 parser.add_option("-l", "--log",dest="log",action="store",default="",type="string", help="use log on x- or y-axis (write 'x', 'y' or 'xy') [format: string] [default: ]")
238 parser.add_option("-v", "--verbosity",dest="verbosity",action="store",default=1, type="int",help="trace level [format: int]")
239 (options, args) = parser.parse_args()
240
241 plotTranscriptList = PlotTranscriptList(options.verbosity)
242 plotTranscriptList.x = options.x
243 plotTranscriptList.y = options.y
244 plotTranscriptList.z = options.z
245 plotTranscriptList.xDefault = options.xDefault
246 plotTranscriptList.yDefault = options.yDefault
247 plotTranscriptList.zDefault = options.zDefault
248 plotTranscriptList.shape = options.shape
249 plotTranscriptList.bucket = options.bucket
250 plotTranscriptList.log = options.log
251 plotTranscriptList.setPlotter(options.outputFileName, options.keep, options.log, options.xLabel, options.yLabel)
252 plotTranscriptList.setShape(options.shape)
253 plotTranscriptList.setInput(options.inputFileName, options.format)
254 plotTranscriptList.run()
255