diff smart_toolShed/commons/core/writer/MySqlTranscriptWriter.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/smart_toolShed/commons/core/writer/MySqlTranscriptWriter.py	Thu Jan 17 10:52:14 2013 -0500
@@ -0,0 +1,214 @@
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os
+import random
+from SMART.Java.Python.mySql.MySqlTable import MySqlTable
+from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable
+from SMART.Java.Python.misc.Progress import Progress
+
+class MySqlTranscriptWriter(object):
+    """
+    A class that writes a transcript list into a mySQL table
+    @ivar name:                      name of the tables 
+    @type name:                      string
+    @ivar tables:                    the tables
+    @type tables:                    dict of L{MySqlTranscriptTable<MySqlTranscriptTable>}
+    @ivar mySqlConnection:           connection to a MySQL database
+    @type mySqlConnection:           class L{MySqlConnection<MySqlConnection>}
+    @ivar tmpTranscriptFileHandles:  files where transcripts are temporary stored, before copy into database
+    @type tmpTranscriptFileHandles:  dict of file handles
+    @ivar nbTranscriptsByChromosome: number of transcripts written
+    @type nbTranscriptsByChromosome: dict of int (one for each chromosome)
+    @ivar randomNumber:              a random number, used for having a unique name for the tables
+    @type randomNumber:              int
+    @ivar toBeWritten:               there exists transcripts to be copied into database
+    @type toBeWritten:               bool                
+    @ivar verbosity:                 verbosity
+    @type verbosity:                 int        
+    """
+
+
+    def __init__(self, connection, name = None, verbosity = 0):
+        """
+        Constructor
+        @param name:      name of the file 
+        @type  name:      string
+        @param verbosity: verbosity
+        @type  verbosity: int
+        """
+        self.name                      = name
+        self.verbosity                 = verbosity
+        self.tables                    = {}
+        self.indices                   = {}
+        self.tmpTranscriptFileHandles  = {}
+        self.nbTranscriptsByChromosome = {}
+        self.toBeWritten               = False
+        self.randomNumber              = random.randint(0, 100000)
+        self.mySqlConnection           = connection
+        self.nbTmpFiles                = 100
+        self.transcriptValues          = {}
+        self.nbTranscriptValues        = 1000
+        if self.name != None:
+            pos = self.name.rfind(os.sep)
+            if pos != -1:
+                self.name = self.name[pos+1:]
+            
+
+    def __del__(self):
+        """
+        Destructor
+        Possibly write into into database the last transcripts
+        """
+        if self.toBeWritten:
+            self.write()
+
+
+    def addIndex(self, name, values):
+        """
+        Add an index to the tables
+        @param name:   name of the index
+        @type  name:   string
+        @param values: values to index
+        @type  values: list of strings
+        """
+        self.indices[name] = values
+
+
+    def createTable(self, chromosome):
+        """
+        Create a table for a chromosome
+        @param chromosome: a chromosome name
+        @type  chromosome: string
+        """
+        self.tables[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.name, chromosome, self.verbosity)
+        self.tables[chromosome].createTranscriptTable()
+        for name, values in self.indices.iteritems():
+            self.tables[chromosome].createIndex("%s_%s_%d" % (name, chromosome, self.randomNumber), values)
+
+        
+    
+    def addTranscript(self, transcript):
+        """
+        Add a transcript to the list of transcripts to be written
+        @param transcript: transcript to be written
+        @type  transcript: class L{Transcript<Transcript>}
+        """
+        chromosome = transcript.getChromosome()
+        if chromosome not in self.tables:
+            self.createTable(chromosome)
+            self.nbTranscriptsByChromosome[chromosome] = 1
+        if chromosome not in self.transcriptValues:
+            self.transcriptValues[chromosome] = []
+            
+        self.transcriptValues[chromosome].append(transcript.getSqlValues())
+
+        self.nbTranscriptsByChromosome[chromosome] += 1
+        self.toBeWritten                            = True
+        if sum([len(transcripts) for transcripts in self.transcriptValues.values()]) > self.nbTranscriptValues:
+            self.write() 
+
+
+    def addElement(self, element):
+        """
+        Same as "addTranscript"
+        @param element: transcript to be written
+        @type  element: class L{Transcript<Transcript>}
+        """
+        self.addTranscript(element)
+
+
+#   def addTranscriptList(self, transcriptListParser):
+#       """
+#       Add a list of transcripts to the transcripts to be written
+#       @param transcriptListParser: transcripts to be written
+#       @type  transcriptListParser: class L{TranscriptListParser<TranscriptListParser>}
+#       """
+#       progress = Progress(transcriptListParser.getNbTranscripts(), "Storing %s into database" % (transcriptListParser.fileName), self.verbosity)
+#       for transcript in transcriptListParser.getIterator():
+#           self.addTranscript(transcript)
+#           progress.inc()
+#       progress.done()
+            
+            
+    def addTranscriptList(self, transcriptListParser):
+        """
+        Add a list of transcripts to the transcripts to be written
+        @param transcriptListParser: transcripts to be written
+        @type  transcriptListParser: class L{TranscriptListParser<TranscriptListParser>}
+        """
+        self.transcriptListParser = transcriptListParser
+        self.mySqlConnection.executeManyQueriesIterator(self)
+            
+            
+    def getIterator(self):
+        """
+        Iterator to the SQL commands to insert the list
+        """
+        progress = Progress(self.transcriptListParser.getNbTranscripts(), "Storing %s into database" % (self.transcriptListParser.fileName), self.verbosity)
+        for transcript in self.transcriptListParser.getIterator():
+            chromosome = transcript.getChromosome()
+            if chromosome not in self.tables:
+                self.createTable(chromosome)
+            self.nbTranscriptsByChromosome[chromosome] = self.nbTranscriptsByChromosome.get(chromosome, 0) + 1
+            values = transcript.getSqlValues()
+            yield "INSERT INTO '%s' (%s) VALUES (%s)" % (self.tables[chromosome].name, ", ".join(self.tables[chromosome].variables), ", ".join([MySqlTable.formatSql(values[variable], self.tables[chromosome].types[variable], self.tables[chromosome].sizes[variable]) for variable in self.tables[chromosome].variables]))
+            progress.inc()
+        progress.done()
+            
+            
+    def write(self):
+        """
+        Copy the content of the files into the database
+        (May add transcripts to already created databases)
+        """
+        for chromosome in self.transcriptValues:
+            if chromosome in self.transcriptValues:
+                self.tables[chromosome].insertMany(self.transcriptValues[chromosome])
+        self.transcriptValues = {}
+        self.toBeWritten      = False
+            
+            
+    def getTables(self):
+        """
+        Get the tables
+        @return: the mySQL tables
+        """
+        if self.toBeWritten:
+            self.write()
+        return self.tables
+
+            
+            
+    def removeTables(self):
+        """
+        Drop the tables
+        """
+        for chromosome in self.tables:
+            self.tables[chromosome].remove()