diff SMART/Java/Python/structure/TranscriptList.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/TranscriptList.py	Fri Jan 18 04:54:14 2013 -0500
@@ -0,0 +1,172 @@
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.mySql.MySqlTable import MySqlTable
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.misc.Progress import Progress
+
+
+class TranscriptList(object):
+    """A class that codes for a list of transcript"""
+
+    def __init__(self, verbosity = 0):
+        self.transcripts = dict()
+        self.longestTranscript = 0
+        self.verbosity = verbosity
+
+
+    def getTranscript(self, chromosome, index):
+        return self.transcripts[chromosome][index]
+        
+
+    def getChromosomes(self):
+        return self.transcripts.keys()
+
+
+    def getTranscriptsOnChromosome(self, chromosome):
+        if chromosome not in self.transcripts:
+            return []
+        return self.transcripts[chromosome]
+
+
+    def addTranscript(self, transcript):
+        if transcript.getChromosome() in self.transcripts:
+            self.transcripts[transcript.getChromosome()].append(transcript)
+        else:
+            self.transcripts[transcript.getChromosome()] = [transcript]
+        self.longestTranscript = max(self.longestTranscript, transcript.getEnd() - transcript.getStart())
+        
+
+    def removeTranscript(self, chromosome, i):
+        del self.transcripts[chromosome][i]
+
+
+    def removeAll(self):
+        self.transcripts = {}
+
+
+    def getNbTranscripts(self):
+        nbTranscripts = 0
+        for chromosome in self.transcripts:
+            nbTranscripts += len(self.transcripts[chromosome])
+        return nbTranscripts
+
+
+    def getSize(self):
+        size = 0
+        for chromosome in self.transcripts:
+            for transcript in self.transcripts[chromosome]:
+                size += transcript.getSize()
+        return size
+
+
+    def sort(self):
+        for chromosome in self.transcripts:
+            self.transcripts[chromosome].sort(lambda x, y: x.getStart() - y.getStart())
+
+
+    def removeOverlapWith(self, transcriptList):
+        transcriptList.sort()
+        for chromosome in self.transcripts:
+            progress = Progress(len(self.transcripts[chromosome]), "Handling chromosome %s" % (chromosome), self.verbosity)
+            for thisTranscriptId in range(len(self.transcripts[chromosome])):
+                progress.inc()
+                for thatTranscriptId in range(len(transcriptList.transcripts[chromosome])):
+                    if self.transcripts[chromosome][thisTranscriptId].overlapWith(transcriptList.transcripts[chromosome][thatTranscriptId]):
+                        self.transcripts[chromosome][thisTranscriptId] = None
+                        break
+                    if self.transcripts[chromosome][thisTranscriptId].getEnd() > transcriptList.transcripts[chromosome][thatTranscriptId]:
+                        break
+            self.transcripts[chromosome] = [transcript for transcript in self.transcripts[chromosome] if transcript != None]
+        progress.done()
+
+
+    def removeOverlapWithExon(self, transcriptList):
+        transcriptList.sort()
+        for chromosome in self.transcripts:
+            progress = Progress(len(self.transcripts[chromosome]), "Handling chromosome %s" % (chromosome), self.verbosity)
+            for thisTranscriptId in range(len(self.transcripts[chromosome])):
+                progress.inc()
+                for thatTranscriptId in range(len(transcriptList.transcripts[chromosome])):
+                    if self.transcripts[chromosome][thisTranscriptId].overlapWithExon(transcriptList.transcripts[chromosome][thatTranscriptId]):
+                        self.transcripts[chromosome][thisTranscriptId] = None
+                        break
+                    if self.transcripts[chromosome][thisTranscriptId].getEnd() > transcriptList.transcripts[chromosome][thatTranscriptId]:
+                        break
+            self.transcripts[chromosome] = [transcript for transcript in self.transcripts[chromosome] if transcript != None]
+        progress.done()
+
+
+    def setDefaultTagValue(self, name, value):
+        for transcript in self.getIterator():
+            transcript.setTag(name, value)
+
+
+    def storeDatabase(self, mySqlConnection):
+        transcriptsTable = MySqlTable("TmpTranscriptsTable", mySqlConnection)
+        transcriptsTable.create(Transcript.getSqlVariables(), Transcript.getSqlTypes())
+        intervalsVariables = Interval.getSqlVariables()
+        intervalsVariables.append("idTranscript")
+        intervalsTypes = Interval.getSqlTypes()
+        intervalsTypes["idTranscript"] = "int"
+        intervalsTable = MySqlTable("TmpIntervalsTable", mySqlConnection)
+        intervalsTable.create(intervalsVariables, intervalsTypes)
+        for chromosome in self.transcripts:
+            for transcript in self.transcripts[chromosome]:
+                idTranscript = transcriptsTable.addLine(transcript.getSqlValues())
+                for exon in transcript.getExons():
+                    intervalValues = exon.getSqlValues()
+                    intervalValues["idTranscript"] = idTranscript
+                    intervalsTable.addLine(intervalValues)
+                    
+    
+    def getIterator(self):
+        chromosomes = self.transcripts.keys()
+        currentChromosome = 0
+        currentTranscript = 0
+        while True:
+            if currentChromosome >= len(chromosomes):
+                return
+            elif currentTranscript >= len(self.transcripts[chromosomes[currentChromosome]]):
+                currentTranscript    = 0
+                currentChromosome += 1
+            elif self.transcripts[chromosomes[currentChromosome]][currentTranscript] == None:
+                currentTranscript += 1
+            else:
+                yield self.transcripts[chromosomes[currentChromosome]][currentTranscript]
+                currentTranscript += 1
+
+
+    def __str__(self):
+        string = ""
+        for transcript in self.getIterator():
+            string += str(transcript)
+        return string
+