| 
36
 | 
     1 #! /usr/bin/env python
 | 
| 
 | 
     2 #
 | 
| 
 | 
     3 # Copyright INRA-URGI 2009-2012
 | 
| 
 | 
     4 # 
 | 
| 
 | 
     5 # This software is governed by the CeCILL license under French law and
 | 
| 
 | 
     6 # abiding by the rules of distribution of free software. You can use,
 | 
| 
 | 
     7 # modify and/ or redistribute the software under the terms of the CeCILL
 | 
| 
 | 
     8 # license as circulated by CEA, CNRS and INRIA at the following URL
 | 
| 
 | 
     9 # "http://www.cecill.info".
 | 
| 
 | 
    10 # 
 | 
| 
 | 
    11 # As a counterpart to the access to the source code and rights to copy,
 | 
| 
 | 
    12 # modify and redistribute granted by the license, users are provided only
 | 
| 
 | 
    13 # with a limited warranty and the software's author, the holder of the
 | 
| 
 | 
    14 # economic rights, and the successive licensors have only limited
 | 
| 
 | 
    15 # liability.
 | 
| 
 | 
    16 # 
 | 
| 
 | 
    17 # In this respect, the user's attention is drawn to the risks associated
 | 
| 
 | 
    18 # with loading, using, modifying and/or developing or reproducing the
 | 
| 
 | 
    19 # software by the user in light of its specific status of free software,
 | 
| 
 | 
    20 # that may mean that it is complicated to manipulate, and that also
 | 
| 
 | 
    21 # therefore means that it is reserved for developers and experienced
 | 
| 
 | 
    22 # professionals having in-depth computer knowledge. Users are therefore
 | 
| 
 | 
    23 # encouraged to load and test the software's suitability as regards their
 | 
| 
 | 
    24 # requirements in conditions enabling the security of their systems and/or
 | 
| 
 | 
    25 # data to be ensured and, more generally, to use and operate it in the
 | 
| 
 | 
    26 # same conditions as regards security.
 | 
| 
 | 
    27 # 
 | 
| 
 | 
    28 # The fact that you are presently reading this means that you have had
 | 
| 
 | 
    29 # knowledge of the CeCILL license and that you accept its terms.
 | 
| 
 | 
    30 #
 | 
| 
 | 
    31 import os
 | 
| 
 | 
    32 from optparse import OptionParser
 | 
| 
 | 
    33 from commons.core.parsing.ParserChooser import ParserChooser
 | 
| 
 | 
    34 from commons.core.parsing.FastaParser import FastaParser
 | 
| 
 | 
    35 from SMART.Java.Python.structure.Transcript import Transcript
 | 
| 
 | 
    36 from commons.core.writer.Gff3Writer import Gff3Writer
 | 
| 
 | 
    37 from SMART.Java.Python.misc.RPlotter import RPlotter
 | 
| 
 | 
    38 from SMART.Java.Python.misc.MultipleRPlotter import MultipleRPlotter
 | 
| 
 | 
    39 from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
 | 
| 
 | 
    40 from SMART.Java.Python.misc.Progress import Progress
 | 
| 
 | 
    41 
 | 
| 
 | 
    42 TWOSTRANDS = {True: [1, -1], False: [0]}
 | 
| 
 | 
    43 STRANDTOSTR = {1: "(+)", -1: "(-)", 0: ""}
 | 
| 
 | 
    44 
 | 
| 
 | 
    45 class GetDistribution(object):
 | 
| 
 | 
    46 
 | 
| 
 | 
    47 	def __init__(self, verbosity):
 | 
| 
46
 | 
    48 		self.verbosity        = verbosity
 | 
| 
 | 
    49 		self.sizes            = None
 | 
| 
 | 
    50 		self.nbBins           = None
 | 
| 
 | 
    51 		self.sliceSize        = None
 | 
| 
 | 
    52 		self.twoStrands       = False
 | 
| 
 | 
    53 		self.start            = 1
 | 
| 
 | 
    54 		self.names            = ["nbElements"]
 | 
| 
 | 
    55 		self.average          = False
 | 
| 
 | 
    56 		self.nbValues         = {}
 | 
| 
 | 
    57 		self.height           = 300
 | 
| 
 | 
    58 		self.width            = 600
 | 
| 
 | 
    59 		self.dots             = False
 | 
| 
 | 
    60 		self.colors           = None
 | 
| 
 | 
    61 		self.gffFileName      = None
 | 
| 
 | 
    62 		self.csvFileName      = None
 | 
| 
 | 
    63 		self.yMin             = None
 | 
| 
 | 
    64 		self.yMax             = None
 | 
| 
 | 
    65 		self.chromosome       = None
 | 
| 
 | 
    66 		self.merge            = False
 | 
| 
 | 
    67 		self.nbTranscripts    = None
 | 
| 
 | 
    68 		self.factors          = None
 | 
| 
 | 
    69 		self.thicknessCurve   = 1
 | 
| 
 | 
    70 		self.sizePoliceLegend = 1.5
 | 
| 
36
 | 
    71 
 | 
| 
46
 | 
    72 	def setInputFiles(self, fileNames, format):
 | 
| 
 | 
    73 		self.fileNames = fileNames
 | 
| 
 | 
    74 		self.format    = format
 | 
| 
36
 | 
    75 
 | 
| 
 | 
    76 	def setReferenceFile(self, fileName):
 | 
| 
 | 
    77 		if fileName == None:
 | 
| 
 | 
    78 			return
 | 
| 
 | 
    79 		fastaParser = FastaParser(fileName, self.verbosity)
 | 
| 
 | 
    80 		self.chromosomes = fastaParser.getRegions()
 | 
| 
 | 
    81 		self.sizes       = dict([region, fastaParser.getSizeOfRegion(region)] for region in self.chromosomes)
 | 
| 
 | 
    82 		self.maxSize     = max(self.sizes.values())
 | 
| 
 | 
    83 
 | 
| 
 | 
    84 	def setRegion(self, chromosome, start, end):
 | 
| 
46
 | 
    85 		if chromosome == None or start == None or end == None:
 | 
| 
36
 | 
    86 			return
 | 
| 
 | 
    87 		self.maxSize     = options.end
 | 
| 
 | 
    88 		self.sizes       = {chromosome: end}
 | 
| 
 | 
    89 		self.chromosomes = [chromosome]
 | 
| 
 | 
    90 		self.chromosome  = chromosome
 | 
| 
 | 
    91 		self.start       = start
 | 
| 
 | 
    92 		self.end         = end
 | 
| 
 | 
    93 
 | 
| 
 | 
    94 	def setOutputFile(self, fileName):
 | 
| 
 | 
    95 		self.outputFileName = fileName
 | 
| 
 | 
    96 
 | 
| 
 | 
    97 	def setNbBins(self, nbBins):
 | 
| 
46
 | 
    98 		if nbBins != None:
 | 
| 
 | 
    99 			self.nbBins = int(nbBins)
 | 
| 
 | 
   100 
 | 
| 
 | 
   101 	def setBinSize(self, binSize):
 | 
| 
 | 
   102 		if binSize != None:
 | 
| 
 | 
   103 			self.sliceSize = int(binSize)
 | 
| 
36
 | 
   104 
 | 
| 
 | 
   105 	def set2Strands(self, twoStrands):
 | 
| 
 | 
   106 		self.twoStrands = twoStrands
 | 
| 
 | 
   107 
 | 
| 
 | 
   108 	def setNames(self, names):
 | 
| 
 | 
   109 		self.names = names
 | 
| 
46
 | 
   110 		if len(self.names) == 1 and len(self.fileNames) > 1:
 | 
| 
 | 
   111 			self.names = ["file %d" % (i+1) for i in range(len(self.fileNames))]
 | 
| 
36
 | 
   112 
 | 
| 
 | 
   113 	def setAverage(self, average):
 | 
| 
 | 
   114 		self.average = average
 | 
| 
 | 
   115 
 | 
| 
 | 
   116 	def setNormalization(self, normalization):
 | 
| 
 | 
   117 		self.normalization = normalization
 | 
| 
 | 
   118 	
 | 
| 
46
 | 
   119 	def setNormalizationFactors(self, factors):
 | 
| 
 | 
   120 		self.factors = dict([name, 1.0] for name in self.names) if factors == None else dict(zip(self.names, factors))
 | 
| 
 | 
   121 	
 | 
| 
36
 | 
   122 	def setImageSize(self, height, width):
 | 
| 
 | 
   123 		self.height = height
 | 
| 
 | 
   124 		self.width  = width
 | 
| 
 | 
   125 
 | 
| 
46
 | 
   126 	def setDots(self, dots):
 | 
| 
 | 
   127 		self.dots = dots
 | 
| 
 | 
   128 
 | 
| 
36
 | 
   129 	def setYLimits(self, yMin, yMax):
 | 
| 
 | 
   130 		self.yMin = yMin
 | 
| 
 | 
   131 		self.yMax = yMax
 | 
| 
 | 
   132 
 | 
| 
 | 
   133 	def setColors(self, colors):
 | 
| 
 | 
   134 		self.colors = colors
 | 
| 
 | 
   135 
 | 
| 
 | 
   136 	def writeGff(self, fileName):
 | 
| 
 | 
   137 		self.gffFileName = fileName
 | 
| 
 | 
   138 
 | 
| 
 | 
   139 	def writeCsv(self, fileName):
 | 
| 
 | 
   140 		self.csvFileName = fileName
 | 
| 
 | 
   141 
 | 
| 
 | 
   142 	def mergePlots(self, merge):
 | 
| 
 | 
   143 		self.merge = merge
 | 
| 
 | 
   144 
 | 
| 
46
 | 
   145 	def setThicknessCurve(self, thickness) :
 | 
| 
 | 
   146 		self.thickness = thickness
 | 
| 
 | 
   147 
 | 
| 
 | 
   148 	def setSizePoliceLegend(self, sizePoliceLegend):
 | 
| 
 | 
   149 		self.sizePoliceLegend = sizePoliceLegend
 | 
| 
 | 
   150 
 | 
| 
36
 | 
   151 	def _estimateSizes(self):
 | 
| 
46
 | 
   152 		self.sizes         = {}
 | 
| 
 | 
   153 		self.nbTranscripts = {}
 | 
| 
 | 
   154 		for fileName in self.fileNames:
 | 
| 
 | 
   155 			progress = UnlimitedProgress(10000, "Reading %s for chromosome size estimate" % (fileName), self.verbosity)
 | 
| 
 | 
   156 			parserChooser = ParserChooser(self.verbosity)
 | 
| 
 | 
   157 			parserChooser.findFormat(self.format)
 | 
| 
 | 
   158 			parser = parserChooser.getParser(fileName)
 | 
| 
 | 
   159 			for nbTranscripts, transcript in enumerate(parser.getIterator()):
 | 
| 
 | 
   160 				if transcript.__class__.__name__ == "Mapping":
 | 
| 
 | 
   161 					transcript = transcript.getTranscript()
 | 
| 
 | 
   162 				chromosome = transcript.getChromosome()
 | 
| 
 | 
   163 				start      = transcript.getStart()
 | 
| 
 | 
   164 				self.sizes[chromosome] = max(start, self.sizes.get(chromosome, 0))
 | 
| 
 | 
   165 				progress.inc()
 | 
| 
 | 
   166 			progress.done()
 | 
| 
 | 
   167 			self.nbTranscripts[fileName] = nbTranscripts
 | 
| 
36
 | 
   168 
 | 
| 
 | 
   169 	def _computeSliceSize(self):
 | 
| 
 | 
   170 		if self.nbBins == 0:
 | 
| 
 | 
   171 			return
 | 
| 
 | 
   172 		tmp1           = int(max(self.sizes.values()) / float(self.nbBins))
 | 
| 
 | 
   173 		tmp2           = 10 ** (len("%d" % (tmp1))-2)
 | 
| 
 | 
   174 		self.sliceSize = max(1, int((tmp1 / tmp2) * tmp2))
 | 
| 
 | 
   175 		if self.verbosity > 0:
 | 
| 
 | 
   176 			print "choosing bin size of %d" % (self.sliceSize)
 | 
| 
 | 
   177 
 | 
| 
 | 
   178 	def _initBins(self):
 | 
| 
 | 
   179 		self.bins = {}
 | 
| 
 | 
   180 		for chromosome in self.sizes:
 | 
| 
 | 
   181 			self.bins[chromosome] = {}
 | 
| 
 | 
   182 			for name in self.names:
 | 
| 
 | 
   183 				self.bins[chromosome][name] = {}
 | 
| 
 | 
   184 				for strand in TWOSTRANDS[self.twoStrands]:
 | 
| 
 | 
   185 					if self.nbBins == 0:
 | 
| 
 | 
   186 						self.bins[chromosome][name][strand] = {}
 | 
| 
 | 
   187 					else:
 | 
| 
 | 
   188 						self.bins[chromosome][name][strand] = dict([(i * self.sliceSize + 1, 0.0) for i in range(self.start / self.sliceSize, self.sizes[chromosome] / self.sliceSize + 1)])
 | 
| 
 | 
   189 
 | 
| 
 | 
   190 	def _populateBins(self):
 | 
| 
46
 | 
   191 		for id, fileName in enumerate(self.fileNames):
 | 
| 
 | 
   192 			if self.nbTranscripts == None:
 | 
| 
 | 
   193 				progress = UnlimitedProgress(10000, "Counting data", self.verbosity)
 | 
| 
36
 | 
   194 			else:
 | 
| 
46
 | 
   195 				progress = Progress(self.nbTranscripts[fileName], "Counting data", self.verbosity)
 | 
| 
 | 
   196 			parserChooser = ParserChooser(self.verbosity)
 | 
| 
 | 
   197 			parserChooser.findFormat(self.format)
 | 
| 
 | 
   198 			parser = parserChooser.getParser(fileName)
 | 
| 
 | 
   199 			for transcript in parser.getIterator():
 | 
| 
 | 
   200 				if transcript.__class__.__name__ == "Mapping":
 | 
| 
 | 
   201 					transcript = transcript.getTranscript()
 | 
| 
 | 
   202 				progress.inc()
 | 
| 
 | 
   203 				chromosome = transcript.getChromosome()
 | 
| 
 | 
   204 				start      = transcript.getStart()
 | 
| 
 | 
   205 				if self.chromosome and (chromosome != self.chromosome or start < self.start or start > self.end):
 | 
| 
 | 
   206 					continue
 | 
| 
 | 
   207 				strand = transcript.getDirection() if self.twoStrands else 0
 | 
| 
 | 
   208 				if self.nbBins != 0:
 | 
| 
 | 
   209 					bin = (start / self.sliceSize) * self.sliceSize + 1
 | 
| 
 | 
   210 				else:
 | 
| 
 | 
   211 					bin = start
 | 
| 
 | 
   212 				if len(self.fileNames) > 1:
 | 
| 
 | 
   213 					nbElements = transcript.getTagValue("nbElements") if "nbElements" in transcript.getTagNames() else 1
 | 
| 
 | 
   214 					name       = self.names[id]
 | 
| 
 | 
   215 					self.bins[chromosome][name][strand][bin] = self.bins[chromosome][name][strand].get(bin, 0) + nbElements
 | 
| 
 | 
   216 					self.nbValues[name] = self.nbValues.get(name, 0) + nbElements
 | 
| 
 | 
   217 				else:
 | 
| 
 | 
   218 					for name in self.names:
 | 
| 
 | 
   219 						value = float(transcript.tags.get(name, 1))
 | 
| 
 | 
   220 						self.bins[chromosome][name][strand][bin] = self.bins[chromosome][name][strand].get(bin, 0) + value
 | 
| 
 | 
   221 						self.nbValues[name] = self.nbValues.get(name, 0) + value
 | 
| 
 | 
   222 			progress.done()
 | 
| 
36
 | 
   223 
 | 
| 
46
 | 
   224 	def _normalizeFactors(self):
 | 
| 
36
 | 
   225 		for chromosome in self.bins:
 | 
| 
 | 
   226 			for name in self.bins[chromosome]:
 | 
| 
 | 
   227 				for strand in self.bins[chromosome][name]:
 | 
| 
 | 
   228 					for bin in self.bins[chromosome][name][strand]:
 | 
| 
46
 | 
   229 						self.bins[chromosome][name][strand][bin] *= self.factors[name]
 | 
| 
 | 
   230 
 | 
| 
 | 
   231 	def _normalize(self):
 | 
| 
 | 
   232 		average      = float(sum(self.nbValues.values())) / len(self.nbValues.keys())
 | 
| 
 | 
   233 		self.factors = dict([name, float(average) / self.nbValues[name]] for name in self.nbValues)
 | 
| 
 | 
   234 		self._normalizeFactors()
 | 
| 
36
 | 
   235 
 | 
| 
 | 
   236 	def _computeAverage(self):
 | 
| 
 | 
   237 		for chromosome in self.bins:
 | 
| 
 | 
   238 			for name in self.bins[chromosome]:
 | 
| 
 | 
   239 				for strand in self.bins[chromosome][name]:
 | 
| 
 | 
   240 					for bin in self.bins[chromosome][name][strand]:
 | 
| 
 | 
   241 						self.bins[chromosome][name][strand][bin] = float(self.bins[chromosome][name][strand][bin]) / self.sliceSize
 | 
| 
 | 
   242 
 | 
| 
 | 
   243 	def _getPlotter(self, chromosome):
 | 
| 
 | 
   244 		plot = RPlotter("%s_%s.png" % (os.path.splitext(self.outputFileName)[0], chromosome), self.verbosity)
 | 
| 
 | 
   245 		plot.setImageSize(self.width, self.height)
 | 
| 
46
 | 
   246 		plot.setLineWidth(self.thickness)
 | 
| 
 | 
   247 		plot.setSizePoliceLegend(self.sizePoliceLegend)
 | 
| 
 | 
   248 		if self.dots:
 | 
| 
 | 
   249 			plot.setPoints(True)
 | 
| 
36
 | 
   250 		if self.sizes[chromosome] <= 1000:
 | 
| 
 | 
   251 			unit  = "nt."
 | 
| 
 | 
   252 			ratio = 1.0
 | 
| 
 | 
   253 		elif self.sizes[chromosome] <= 1000000:
 | 
| 
 | 
   254 			unit  = "kb"
 | 
| 
 | 
   255 			ratio = 1000.0
 | 
| 
 | 
   256 		else:
 | 
| 
 | 
   257 			unit  = "Mb"
 | 
| 
 | 
   258 			ratio = 1000000.0
 | 
| 
 | 
   259 		if self.yMin != None:
 | 
| 
 | 
   260 			plot.setMinimumY(self.yMin)
 | 
| 
 | 
   261 		if self.yMax != None:
 | 
| 
 | 
   262 			plot.setMaximumY(self.yMax)
 | 
| 
 | 
   263 		plot.setXLabel("Position on %s (in %s)" % (chromosome.replace("_", " "), unit))
 | 
| 
46
 | 
   264 		if len(self.names) > 1:
 | 
| 
 | 
   265 			plot.setLegend(True, True)
 | 
| 
36
 | 
   266 		for i, name in enumerate(self.bins[chromosome]):
 | 
| 
 | 
   267 			for strand in self.bins[chromosome][name]:
 | 
| 
46
 | 
   268 				#fullName = "%s %s" % (name.replace("_", " ")[:6], STRANDTOSTR[strand])
 | 
| 
 | 
   269 				fullName = name.replace("_", " ")[:6]
 | 
| 
36
 | 
   270 				factor = 1 if strand == 0 else strand
 | 
| 
 | 
   271 				correctedLine = dict([(key / ratio, value * factor) for key, value in self.bins[chromosome][name][strand].iteritems()])
 | 
| 
 | 
   272 				plot.addLine(correctedLine, fullName, self.colors[i] if self.colors else None)
 | 
| 
 | 
   273 		return plot
 | 
| 
 | 
   274 
 | 
| 
 | 
   275 	def _plot(self):
 | 
| 
 | 
   276 		if self.merge:
 | 
| 
 | 
   277 			multiplePlot = MultipleRPlotter(self.outputFileName, self.verbosity)
 | 
| 
 | 
   278 			multiplePlot.setImageSize(self.width, self.height * len(self.bins.keys()))
 | 
| 
 | 
   279 		progress = Progress(len(self.bins.keys()), "Plotting", options.verbosity)
 | 
| 
 | 
   280 		for chromosome in sorted(self.bins.keys()):
 | 
| 
 | 
   281 			plot = self._getPlotter(chromosome)
 | 
| 
 | 
   282 			if self.merge:
 | 
| 
 | 
   283 				multiplePlot.addPlot(plot)
 | 
| 
 | 
   284 			else:
 | 
| 
 | 
   285 				plot.plot()
 | 
| 
 | 
   286 			progress.inc()
 | 
| 
 | 
   287 		if self.merge:
 | 
| 
 | 
   288 			multiplePlot.plot()
 | 
| 
 | 
   289 		progress.done()
 | 
| 
 | 
   290 
 | 
| 
 | 
   291 	def _writeCsv(self):
 | 
| 
 | 
   292 		if self.verbosity > 1:
 | 
| 
 | 
   293 			print "Writing CSV file..."
 | 
| 
 | 
   294 		csvHandle = open(self.csvFileName, "w")
 | 
| 
 | 
   295 		csvHandle.write("chromosome;tag;strand")
 | 
| 
 | 
   296 		if self.nbBins != 0:
 | 
| 
 | 
   297 			xValues = range(self.start / self.sliceSize, max(self.sizes.values()) / self.sliceSize + 1)
 | 
| 
 | 
   298 			for value in xValues:
 | 
| 
 | 
   299 				csvHandle.write(";%d-%d" % (value * self.sliceSize + 1, (value+1) * self.sliceSize))
 | 
| 
 | 
   300 			csvHandle.write("\n")
 | 
| 
 | 
   301 		else:
 | 
| 
 | 
   302 			xValues = []
 | 
| 
 | 
   303 			for chromosome in self.bins:
 | 
| 
 | 
   304 				for name in self.bins[chromosome]:
 | 
| 
 | 
   305 					for strand in self.bins[chromosome][name]:
 | 
| 
 | 
   306 						for bin in self.bins[chromosome][name][strand]:
 | 
| 
 | 
   307 							xValues.extend(self.bins[chromosome][name][strand].keys())
 | 
| 
 | 
   308 			xValues = sorted(list(set(xValues)))
 | 
| 
 | 
   309 			for value in xValues:
 | 
| 
 | 
   310 				csvHandle.write(";%d" % (value))
 | 
| 
 | 
   311 			csvHandle.write("\n")
 | 
| 
 | 
   312 		for chromosome in self.bins:
 | 
| 
 | 
   313 			csvHandle.write("%s" % (chromosome))
 | 
| 
 | 
   314 			for name in self.bins[chromosome]:
 | 
| 
 | 
   315 				csvHandle.write(";%s" % (name))
 | 
| 
 | 
   316 				for strand in self.bins[chromosome][name]:
 | 
| 
 | 
   317 					csvHandle.write(";%s" % (STRANDTOSTR[strand]))
 | 
| 
 | 
   318 					for bin in xValues:
 | 
| 
 | 
   319 						csvHandle.write(";%.2f" % (self.bins[chromosome][name][strand].get(bin, 0)))
 | 
| 
 | 
   320 					csvHandle.write("\n")
 | 
| 
 | 
   321 				csvHandle.write(";")
 | 
| 
 | 
   322 			csvHandle.write(";")
 | 
| 
 | 
   323 		csvHandle.close()
 | 
| 
 | 
   324 		if self.verbosity > 1:
 | 
| 
 | 
   325 			print "...done"
 | 
| 
 | 
   326 		
 | 
| 
 | 
   327 	def _writeGff(self):
 | 
| 
 | 
   328 		if self.verbosity > 1:
 | 
| 
 | 
   329 			print "Writing GFF file..."
 | 
| 
 | 
   330 		writer = Gff3Writer(self.gffFileName, self.verbosity)
 | 
| 
 | 
   331 		cpt    = 1
 | 
| 
 | 
   332 		for chromosome in self.bins:
 | 
| 
 | 
   333 			for name in self.bins[chromosome]:
 | 
| 
 | 
   334 				for strand in self.bins[chromosome][name]:
 | 
| 
 | 
   335 					for bin in self.bins[chromosome][name][strand]:
 | 
| 
 | 
   336 						transcript = Transcript()
 | 
| 
 | 
   337 						transcript.setChromosome(chromosome)
 | 
| 
 | 
   338 						transcript.setStart(bin)
 | 
| 
 | 
   339 						if self.nbBins > 0:
 | 
| 
 | 
   340 							transcript.setEnd(bin + self.sliceSize)
 | 
| 
 | 
   341 						else:
 | 
| 
 | 
   342 							transcript.setEnd(self.start)
 | 
| 
 | 
   343 						transcript.setDirection(1 if strand == 0 else strand)
 | 
| 
 | 
   344 						transcript.setTagValue("ID", "region%d" % (cpt))
 | 
| 
 | 
   345 						cpt += 1
 | 
| 
 | 
   346 		writer.write()
 | 
| 
 | 
   347 		if self.verbosity > 1:
 | 
| 
 | 
   348 			print "...done"
 | 
| 
 | 
   349 
 | 
| 
 | 
   350 	def run(self):
 | 
| 
 | 
   351 		if self.sizes == None:
 | 
| 
 | 
   352 			self._estimateSizes()
 | 
| 
46
 | 
   353 		if self.sliceSize == None:
 | 
| 
 | 
   354 			self._computeSliceSize()
 | 
| 
36
 | 
   355 		self._initBins()
 | 
| 
 | 
   356 		self._populateBins()
 | 
| 
 | 
   357 		if self.normalization:
 | 
| 
 | 
   358 			self._normalize()
 | 
| 
46
 | 
   359 		if self.factors != None:
 | 
| 
 | 
   360 			self._normalizeFactors()
 | 
| 
36
 | 
   361 		if self.average:
 | 
| 
 | 
   362 			self._computeAverage()
 | 
| 
 | 
   363 		self._plot()
 | 
| 
 | 
   364 		if self.csvFileName != None:
 | 
| 
 | 
   365 			self._writeCsv()
 | 
| 
 | 
   366 		if self.gffFileName != None:
 | 
| 
 | 
   367 			self._writeGff()
 | 
| 
 | 
   368 
 | 
| 
 | 
   369 
 | 
| 
 | 
   370 if __name__ == "__main__":
 | 
| 
 | 
   371 
 | 
| 
 | 
   372 	description = "Get Distribution v1.0.2: Get the distribution of the genomic coordinates on a genome. [Category: Visualization]"
 | 
| 
 | 
   373 
 | 
| 
 | 
   374 	parser = OptionParser(description = description)
 | 
| 
46
 | 
   375 	parser.add_option("-i", "--input",        dest="inputFileNames",      action="store",                            type="string", help="input files separated by commas [compulsory] [format: string]")
 | 
| 
 | 
   376 	parser.add_option("-f", "--format",       dest="format",              action="store",                            type="string", help="format of the input file [compulsory] [format: transcript file format]")
 | 
| 
 | 
   377 	parser.add_option("-o", "--output",       dest="outputFileName",      action="store",                            type="string", help="output file [compulsory] [format: output file in GFF3 format]")
 | 
| 
 | 
   378 	parser.add_option("-r", "--reference",    dest="referenceFileName",   action="store",      default=None,         type="string", help="file containing the genome [format: file in FASTA format]")
 | 
| 
 | 
   379 	parser.add_option("-b", "--nbBins",       dest="nbBins",              action="store",      default=1000,         type="int",    help="number of bins [default: 1000] [format: int]")
 | 
| 
 | 
   380 	parser.add_option("-B", "--binSize",      dest="binSize",             action="store",      default=None,         type="int",    help="bin size [default: None] [format: int]")
 | 
| 
 | 
   381 	parser.add_option("-2", "--bothStrands",  dest="bothStrands",         action="store_true", default=False,                       help="plot one curve per strand [format: bool] [default: false]")
 | 
| 
 | 
   382 	parser.add_option("-c", "--chromosome",   dest="chromosome",          action="store",      default=None,         type="string", help="plot only a chromosome [format: string]")
 | 
| 
 | 
   383 	parser.add_option("-s", "--start",        dest="start",               action="store",      default=None,         type="int",    help="start from a given region [format: int]")
 | 
| 
 | 
   384 	parser.add_option("-e", "--end",          dest="end",                 action="store",      default=None,         type="int",    help="end from a given region [format: int]")
 | 
| 
 | 
   385 	parser.add_option("-y", "--yMin",         dest="yMin",                action="store",      default=None,         type="int",    help="minimum value on the y-axis to plot [format: int]")
 | 
| 
 | 
   386 	parser.add_option("-Y", "--yMax",         dest="yMax",                action="store",      default=None,         type="int",    help="maximum value on the y-axis to plot [format: int]")
 | 
| 
 | 
   387 	parser.add_option("-x", "--csv",          dest="csv",                 action="store",      default=None,                        help="write a .csv file [format: output file in CSV format] [default: None]")
 | 
| 
 | 
   388 	parser.add_option("-g", "--gff",          dest="gff",                 action="store",      default=None,                        help="also write GFF3 file [format: output file in GFF format] [default: None]")
 | 
| 
 | 
   389 	parser.add_option("-H", "--height",       dest="height",              action="store",      default=500,          type="int",    help="height of the graphics [format: int] [default: 300]")
 | 
| 
 | 
   390 	parser.add_option("-W", "--width",        dest="width",               action="store",      default=800,          type="int",    help="width of the graphics [format: int] [default: 1000]")
 | 
| 
 | 
   391 	parser.add_option("-t", "--thickness", 	  dest="lineThickness", 	  action="store",      default=1,            type="int",    help="thickness of the lines [format : int] [default : 1]")
 | 
| 
 | 
   392 	parser.add_option("-d", "--policeLegend", dest="sizePoliceLegend",    action="store",      default=1.5,          type="float",  help="size of the police of the legend  [format : float] [default : 1.5]")
 | 
| 
 | 
   393 	parser.add_option("-D", "--dots",         dest="dots",                action="store_true", default=False,                       help="plot dots instead of lines  [format : bool] [default : false]")
 | 
| 
 | 
   394 	parser.add_option("-a", "--average",      dest="average",             action="store_true", default=False,                       help="plot average (instead of sum) [default: false] [format: boolean]")
 | 
| 
 | 
   395 	parser.add_option("-n", "--names",        dest="names",               action="store",      default="nbElements", type="string", help="name for the tags (separated by commas and no space) [default: None] [format: string]")
 | 
| 
 | 
   396 	parser.add_option("-l", "--color",        dest="colors",              action="store",      default=None,         type="string", help="color of the lines (separated by commas and no space) [format: string]")
 | 
| 
 | 
   397 	parser.add_option("-z", "--normalize",    dest="normalize",           action="store_true", default=False,                       help="normalize data (when panels are different) [format: bool] [default: false]")
 | 
| 
 | 
   398 	parser.add_option("-Z", "--normalizeFac", dest="normalizeFactors",    action="store",      default=None,                        help="normalize data with given factors (when panels are different) [format: string]")
 | 
| 
 | 
   399 	parser.add_option("-m", "--merge",        dest="mergePlots",          action="store_true", default=False,                       help="merge all plots in one figure [format: bool] [default: false]")
 | 
| 
 | 
   400 	parser.add_option("-v", "--verbosity",    dest="verbosity",           action="store",      default=1,            type="int",    help="trace level [default: 1] [format: int]")
 | 
| 
36
 | 
   401 	(options, args) = parser.parse_args()
 | 
| 
 | 
   402 
 | 
| 
 | 
   403 	gt = GetDistribution(options.verbosity)
 | 
| 
46
 | 
   404 	gt.setInputFiles(options.inputFileNames.split(","), options.format)
 | 
| 
36
 | 
   405 	gt.setOutputFile(options.outputFileName)
 | 
| 
 | 
   406 	gt.setReferenceFile(options.referenceFileName)
 | 
| 
46
 | 
   407 	gt.setNbBins(options.nbBins)
 | 
| 
 | 
   408 	gt.setBinSize(options.binSize)
 | 
| 
36
 | 
   409 	gt.set2Strands(options.bothStrands)
 | 
| 
 | 
   410 	gt.setRegion(options.chromosome, options.start, options.end)
 | 
| 
 | 
   411 	gt.setNormalization(options.normalize)
 | 
| 
 | 
   412 	gt.setAverage(options.average)
 | 
| 
 | 
   413 	gt.setYLimits(options.yMin, options.yMax)
 | 
| 
 | 
   414 	gt.writeCsv(options.csv)
 | 
| 
 | 
   415 	gt.writeGff(options.gff)
 | 
| 
 | 
   416 	gt.setImageSize(options.height, options.width)
 | 
| 
 | 
   417 	gt.setNames(options.names.split(","))
 | 
| 
46
 | 
   418 	gt.setThicknessCurve(options.lineThickness)
 | 
| 
 | 
   419 	gt.setSizePoliceLegend(options.sizePoliceLegend)
 | 
| 
36
 | 
   420 	gt.setColors(None if options.colors == None else options.colors.split(","))
 | 
| 
46
 | 
   421 	gt.setDots(options.dots)
 | 
| 
36
 | 
   422 	gt.setNormalization(options.normalize)
 | 
| 
46
 | 
   423 	gt.setNormalizationFactors(None if options.normalizeFactors == None else [float(factor) for factor in options.normalizeFactors.split(",")])
 | 
| 
36
 | 
   424 	gt.mergePlots(options.mergePlots)
 | 
| 
 | 
   425 	gt.run()
 | 
| 
 | 
   426 
 |