Mercurial > repos > yufei-luo > s_mart
diff SMART/Java/Python/structure/TranscriptList.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/Java/Python/structure/TranscriptList.py Fri Jan 18 04:54:14 2013 -0500 @@ -0,0 +1,172 @@ +# +# Copyright INRA-URGI 2009-2010 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +from SMART.Java.Python.structure.Transcript import Transcript +from SMART.Java.Python.mySql.MySqlTable import MySqlTable +from SMART.Java.Python.structure.Interval import Interval +from SMART.Java.Python.misc.Progress import Progress + + +class TranscriptList(object): + """A class that codes for a list of transcript""" + + def __init__(self, verbosity = 0): + self.transcripts = dict() + self.longestTranscript = 0 + self.verbosity = verbosity + + + def getTranscript(self, chromosome, index): + return self.transcripts[chromosome][index] + + + def getChromosomes(self): + return self.transcripts.keys() + + + def getTranscriptsOnChromosome(self, chromosome): + if chromosome not in self.transcripts: + return [] + return self.transcripts[chromosome] + + + def addTranscript(self, transcript): + if transcript.getChromosome() in self.transcripts: + self.transcripts[transcript.getChromosome()].append(transcript) + else: + self.transcripts[transcript.getChromosome()] = [transcript] + self.longestTranscript = max(self.longestTranscript, transcript.getEnd() - transcript.getStart()) + + + def removeTranscript(self, chromosome, i): + del self.transcripts[chromosome][i] + + + def removeAll(self): + self.transcripts = {} + + + def getNbTranscripts(self): + nbTranscripts = 0 + for chromosome in self.transcripts: + nbTranscripts += len(self.transcripts[chromosome]) + return nbTranscripts + + + def getSize(self): + size = 0 + for chromosome in self.transcripts: + for transcript in self.transcripts[chromosome]: + size += transcript.getSize() + return size + + + def sort(self): + for chromosome in self.transcripts: + self.transcripts[chromosome].sort(lambda x, y: x.getStart() - y.getStart()) + + + def removeOverlapWith(self, transcriptList): + transcriptList.sort() + for chromosome in self.transcripts: + progress = Progress(len(self.transcripts[chromosome]), "Handling chromosome %s" % (chromosome), self.verbosity) + for thisTranscriptId in range(len(self.transcripts[chromosome])): + progress.inc() + for thatTranscriptId in range(len(transcriptList.transcripts[chromosome])): + if self.transcripts[chromosome][thisTranscriptId].overlapWith(transcriptList.transcripts[chromosome][thatTranscriptId]): + self.transcripts[chromosome][thisTranscriptId] = None + break + if self.transcripts[chromosome][thisTranscriptId].getEnd() > transcriptList.transcripts[chromosome][thatTranscriptId]: + break + self.transcripts[chromosome] = [transcript for transcript in self.transcripts[chromosome] if transcript != None] + progress.done() + + + def removeOverlapWithExon(self, transcriptList): + transcriptList.sort() + for chromosome in self.transcripts: + progress = Progress(len(self.transcripts[chromosome]), "Handling chromosome %s" % (chromosome), self.verbosity) + for thisTranscriptId in range(len(self.transcripts[chromosome])): + progress.inc() + for thatTranscriptId in range(len(transcriptList.transcripts[chromosome])): + if self.transcripts[chromosome][thisTranscriptId].overlapWithExon(transcriptList.transcripts[chromosome][thatTranscriptId]): + self.transcripts[chromosome][thisTranscriptId] = None + break + if self.transcripts[chromosome][thisTranscriptId].getEnd() > transcriptList.transcripts[chromosome][thatTranscriptId]: + break + self.transcripts[chromosome] = [transcript for transcript in self.transcripts[chromosome] if transcript != None] + progress.done() + + + def setDefaultTagValue(self, name, value): + for transcript in self.getIterator(): + transcript.setTag(name, value) + + + def storeDatabase(self, mySqlConnection): + transcriptsTable = MySqlTable("TmpTranscriptsTable", mySqlConnection) + transcriptsTable.create(Transcript.getSqlVariables(), Transcript.getSqlTypes()) + intervalsVariables = Interval.getSqlVariables() + intervalsVariables.append("idTranscript") + intervalsTypes = Interval.getSqlTypes() + intervalsTypes["idTranscript"] = "int" + intervalsTable = MySqlTable("TmpIntervalsTable", mySqlConnection) + intervalsTable.create(intervalsVariables, intervalsTypes) + for chromosome in self.transcripts: + for transcript in self.transcripts[chromosome]: + idTranscript = transcriptsTable.addLine(transcript.getSqlValues()) + for exon in transcript.getExons(): + intervalValues = exon.getSqlValues() + intervalValues["idTranscript"] = idTranscript + intervalsTable.addLine(intervalValues) + + + def getIterator(self): + chromosomes = self.transcripts.keys() + currentChromosome = 0 + currentTranscript = 0 + while True: + if currentChromosome >= len(chromosomes): + return + elif currentTranscript >= len(self.transcripts[chromosomes[currentChromosome]]): + currentTranscript = 0 + currentChromosome += 1 + elif self.transcripts[chromosomes[currentChromosome]][currentTranscript] == None: + currentTranscript += 1 + else: + yield self.transcripts[chromosomes[currentChromosome]][currentTranscript] + currentTranscript += 1 + + + def __str__(self): + string = "" + for transcript in self.getIterator(): + string += str(transcript) + return string +