Mercurial > repos > yufei-luo > s_mart
view SMART/Java/Python/ClusterizeByTags.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line source
#! /usr/bin/env python # # Copyright INRA-URGI 2009-2011 # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. # import random from optparse import OptionParser from commons.core.parsing.ParserChooser import ParserChooser from commons.core.writer.TranscriptWriter import TranscriptWriter from SMART.Java.Python.structure.Transcript import Transcript from SMART.Java.Python.structure.Interval import Interval from SMART.Java.Python.misc.Progress import Progress from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter OPERATIONS = ("diff", "div") BOOLTOSTRANDS = {True: [0], False: [-1, 1]} class ClusterizeByTags(object): def __init__(self, verbosity): self.verbosity = verbosity self.connection = MySqlConnection(self.verbosity-1) self.defautValue = None self.maxDistance = None self.oneStrand = False def setInputFile(self, fileName, format): chooser = ParserChooser(self.verbosity) chooser.findFormat(format) parser = chooser.getParser(fileName) writer = MySqlTranscriptWriter(self.connection, None, self.verbosity) writer.addTranscriptList(parser) writer.write() self.transcriptTables = writer.getTables() def setOutputFile(self, fileName): self.writer = TranscriptWriter(fileName, "gff3", self.verbosity) def setTag(self, tagName, defaultValue): self.tagName = tagName self.defaultValue = defaultValue def setThreshold(self, threshold): self.threshold = threshold def setOperation(self, operation): self.operation = operation if self.operation not in OPERATIONS: raise Exception("Operation '%s' unsupported: choose among %s" % (self.operation, ", ".join(OPERATIONS))) def setMaxDistance(self, distance): self.maxDistance = distance def setOneStrand(self, oneStrand): self.oneStrand = oneStrand def run(self): for chromosome in sorted(self.transcriptTables.keys()): progress = Progress(self.transcriptTables[chromosome].getNbElements(), "Analyzing %s" % (chromosome), self.verbosity) for strand in BOOLTOSTRANDS[self.oneStrand]: previousValue = None previousTrend = None previousTranscript = None sumValue = 0 command = "SELECT * FROM %s" % (self.transcriptTables[chromosome].getName()) if not self.oneStrand: command += " WHERE direction = %d" % (strand) command += " ORDER BY start, end" for index, transcript in self.transcriptTables[chromosome].selectTranscripts(command): if self.tagName in transcript.getTagNames(): value = transcript.getTagValue(self.tagName) else: value = self.defaultValue if previousValue == None: trend = None else: if self.operation == "diff": trend = value - previousValue else: trend = value / previousValue if previousTranscript == None: sumValue = value elif (previousTrend == None or abs(trend - previousTrend) <= self.threshold) and (self.maxDistance == None or previousTranscript.getDistance(transcript) <= self.maxDistance) and (previousTranscript.getDirection() == transcript.getDirection() or not self.oneStrand): if previousTranscript.getDirection() != transcript.getDirection(): transcript.reverse() previousTranscript.merge(transcript) transcript = previousTranscript sumValue += value previousTrend = trend else: previousTranscript.setTagValue(self.tagName, sumValue) self.writer.addTranscript(previousTranscript) sumValue = value previousTrend = None previousValue = value previousTranscript = transcript progress.inc() if previousTranscript != None: previousTranscript.setTagValue(self.tagName, sumValue) self.writer.addTranscript(previousTranscript) progress.done() self.writer.close() if __name__ == "__main__": description = "Clusterize By Tags v1.0.1: Clusterize a set of element using their tag values. [Category: Merge]" parser = OptionParser(description = description) parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") parser.add_option("-t", "--tag", dest="tagName", action="store", type="string", help="name of the tag [format: string] [compulsory]") parser.add_option("-e", "--default", dest="defaultValue", action="store", default=None, type="int", help="default value for the tag [format: string]") parser.add_option("-r", "--threshold", dest="threshold", action="store", type="int", help="threshold between two consecutive tags [format: int] [compulsory]") parser.add_option("-p", "--operation", dest="operation", action="store", type="string", help="operation to apply between 2 different clusters to compare them [format: choice (diff, div)] [compulsory]") parser.add_option("-d", "--distance", dest="maxDistance", action="store", default=None, type="int", help="maximum distance for 2 clusters to be merged [format: int] [default: None]") parser.add_option("-1", "--oneStrand", dest="oneStrand", action="store_true", default=False, help="also cluster the elements which are on different strands [format: bool] [default: False]") parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") (options, args) = parser.parse_args() cbt = ClusterizeByTags(options.verbosity) cbt.setInputFile(options.inputFileName, options.format) cbt.setOutputFile(options.outputFileName) cbt.setTag(option.tagName, option.defaultValue) cbt.setThreshold(option.threshold) cbt.setOperation(option.operation) cbt.setMaxDistance(operation.maxDistance) cbt.setOneStrand(operation.oneStrand) cbt.run()