| 
36
 | 
     1 #
 | 
| 
 | 
     2 # Copyright INRA-URGI 2009-2010
 | 
| 
 | 
     3 # 
 | 
| 
 | 
     4 # This software is governed by the CeCILL license under French law and
 | 
| 
 | 
     5 # abiding by the rules of distribution of free software. You can use,
 | 
| 
 | 
     6 # modify and/ or redistribute the software under the terms of the CeCILL
 | 
| 
 | 
     7 # license as circulated by CEA, CNRS and INRIA at the following URL
 | 
| 
 | 
     8 # "http://www.cecill.info".
 | 
| 
 | 
     9 # 
 | 
| 
 | 
    10 # As a counterpart to the access to the source code and rights to copy,
 | 
| 
 | 
    11 # modify and redistribute granted by the license, users are provided only
 | 
| 
 | 
    12 # with a limited warranty and the software's author, the holder of the
 | 
| 
 | 
    13 # economic rights, and the successive licensors have only limited
 | 
| 
 | 
    14 # liability.
 | 
| 
 | 
    15 # 
 | 
| 
 | 
    16 # In this respect, the user's attention is drawn to the risks associated
 | 
| 
 | 
    17 # with loading, using, modifying and/or developing or reproducing the
 | 
| 
 | 
    18 # software by the user in light of its specific status of free software,
 | 
| 
 | 
    19 # that may mean that it is complicated to manipulate, and that also
 | 
| 
 | 
    20 # therefore means that it is reserved for developers and experienced
 | 
| 
 | 
    21 # professionals having in-depth computer knowledge. Users are therefore
 | 
| 
 | 
    22 # encouraged to load and test the software's suitability as regards their
 | 
| 
 | 
    23 # requirements in conditions enabling the security of their systems and/or
 | 
| 
 | 
    24 # data to be ensured and, more generally, to use and operate it in the
 | 
| 
 | 
    25 # same conditions as regards security.
 | 
| 
 | 
    26 # 
 | 
| 
 | 
    27 # The fact that you are presently reading this means that you have had
 | 
| 
 | 
    28 # knowledge of the CeCILL license and that you accept its terms.
 | 
| 
 | 
    29 #
 | 
| 
 | 
    30 import os
 | 
| 
 | 
    31 import random
 | 
| 
 | 
    32 from SMART.Java.Python.mySql.MySqlTable import MySqlTable
 | 
| 
 | 
    33 from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable
 | 
| 
 | 
    34 from SMART.Java.Python.misc.Progress import Progress
 | 
| 
 | 
    35 
 | 
| 
 | 
    36 class MySqlTranscriptWriter(object):
 | 
| 
 | 
    37     """
 | 
| 
 | 
    38     A class that writes a transcript list into a mySQL table
 | 
| 
 | 
    39     @ivar name:                      name of the tables 
 | 
| 
 | 
    40     @type name:                      string
 | 
| 
 | 
    41     @ivar tables:                    the tables
 | 
| 
 | 
    42     @type tables:                    dict of L{MySqlTranscriptTable<MySqlTranscriptTable>}
 | 
| 
 | 
    43     @ivar mySqlConnection:           connection to a MySQL database
 | 
| 
 | 
    44     @type mySqlConnection:           class L{MySqlConnection<MySqlConnection>}
 | 
| 
 | 
    45     @ivar tmpTranscriptFileHandles:  files where transcripts are temporary stored, before copy into database
 | 
| 
 | 
    46     @type tmpTranscriptFileHandles:  dict of file handles
 | 
| 
 | 
    47     @ivar nbTranscriptsByChromosome: number of transcripts written
 | 
| 
 | 
    48     @type nbTranscriptsByChromosome: dict of int (one for each chromosome)
 | 
| 
 | 
    49     @ivar randomNumber:              a random number, used for having a unique name for the tables
 | 
| 
 | 
    50     @type randomNumber:              int
 | 
| 
 | 
    51     @ivar toBeWritten:               there exists transcripts to be copied into database
 | 
| 
 | 
    52     @type toBeWritten:               bool                
 | 
| 
 | 
    53     @ivar verbosity:                 verbosity
 | 
| 
 | 
    54     @type verbosity:                 int        
 | 
| 
 | 
    55     """
 | 
| 
 | 
    56 
 | 
| 
 | 
    57 
 | 
| 
 | 
    58     def __init__(self, connection, name = None, verbosity = 0):
 | 
| 
 | 
    59         """
 | 
| 
 | 
    60         Constructor
 | 
| 
 | 
    61         @param name:      name of the file 
 | 
| 
 | 
    62         @type  name:      string
 | 
| 
 | 
    63         @param verbosity: verbosity
 | 
| 
 | 
    64         @type  verbosity: int
 | 
| 
 | 
    65         """
 | 
| 
 | 
    66         self.name                      = name
 | 
| 
 | 
    67         self.verbosity                 = verbosity
 | 
| 
 | 
    68         self.tables                    = {}
 | 
| 
 | 
    69         self.indices                   = {}
 | 
| 
 | 
    70         self.tmpTranscriptFileHandles  = {}
 | 
| 
 | 
    71         self.nbTranscriptsByChromosome = {}
 | 
| 
 | 
    72         self.toBeWritten               = False
 | 
| 
 | 
    73         self.randomNumber              = random.randint(0, 100000)
 | 
| 
 | 
    74         self.mySqlConnection           = connection
 | 
| 
 | 
    75         self.nbTmpFiles                = 100
 | 
| 
 | 
    76         self.transcriptValues          = {}
 | 
| 
 | 
    77         self.nbTranscriptValues        = 1000
 | 
| 
 | 
    78         if self.name != None:
 | 
| 
 | 
    79             pos = self.name.rfind(os.sep)
 | 
| 
 | 
    80             if pos != -1:
 | 
| 
 | 
    81                 self.name = self.name[pos+1:]
 | 
| 
 | 
    82             
 | 
| 
 | 
    83 
 | 
| 
 | 
    84     def __del__(self):
 | 
| 
 | 
    85         """
 | 
| 
 | 
    86         Destructor
 | 
| 
 | 
    87         Possibly write into into database the last transcripts
 | 
| 
 | 
    88         """
 | 
| 
 | 
    89         if self.toBeWritten:
 | 
| 
 | 
    90             self.write()
 | 
| 
 | 
    91 
 | 
| 
 | 
    92 
 | 
| 
 | 
    93     def addIndex(self, name, values):
 | 
| 
 | 
    94         """
 | 
| 
 | 
    95         Add an index to the tables
 | 
| 
 | 
    96         @param name:   name of the index
 | 
| 
 | 
    97         @type  name:   string
 | 
| 
 | 
    98         @param values: values to index
 | 
| 
 | 
    99         @type  values: list of strings
 | 
| 
 | 
   100         """
 | 
| 
 | 
   101         self.indices[name] = values
 | 
| 
 | 
   102 
 | 
| 
 | 
   103 
 | 
| 
 | 
   104     def createTable(self, chromosome):
 | 
| 
 | 
   105         """
 | 
| 
 | 
   106         Create a table for a chromosome
 | 
| 
 | 
   107         @param chromosome: a chromosome name
 | 
| 
 | 
   108         @type  chromosome: string
 | 
| 
 | 
   109         """
 | 
| 
 | 
   110         self.tables[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.name, chromosome, self.verbosity)
 | 
| 
 | 
   111         self.tables[chromosome].createTranscriptTable()
 | 
| 
 | 
   112         for name, values in self.indices.iteritems():
 | 
| 
 | 
   113             self.tables[chromosome].createIndex("%s_%s_%d" % (name, chromosome, self.randomNumber), values)
 | 
| 
 | 
   114 
 | 
| 
 | 
   115         
 | 
| 
 | 
   116     
 | 
| 
 | 
   117     def addTranscript(self, transcript):
 | 
| 
 | 
   118         """
 | 
| 
 | 
   119         Add a transcript to the list of transcripts to be written
 | 
| 
 | 
   120         @param transcript: transcript to be written
 | 
| 
 | 
   121         @type  transcript: class L{Transcript<Transcript>}
 | 
| 
 | 
   122         """
 | 
| 
 | 
   123         chromosome = transcript.getChromosome()
 | 
| 
 | 
   124         if chromosome not in self.tables:
 | 
| 
 | 
   125             self.createTable(chromosome)
 | 
| 
 | 
   126             self.nbTranscriptsByChromosome[chromosome] = 1
 | 
| 
 | 
   127         if chromosome not in self.transcriptValues:
 | 
| 
 | 
   128             self.transcriptValues[chromosome] = []
 | 
| 
 | 
   129             
 | 
| 
 | 
   130         self.transcriptValues[chromosome].append(transcript.getSqlValues())
 | 
| 
 | 
   131 
 | 
| 
 | 
   132         self.nbTranscriptsByChromosome[chromosome] += 1
 | 
| 
 | 
   133         self.toBeWritten                            = True
 | 
| 
 | 
   134         if sum([len(transcripts) for transcripts in self.transcriptValues.values()]) > self.nbTranscriptValues:
 | 
| 
 | 
   135             self.write() 
 | 
| 
 | 
   136 
 | 
| 
 | 
   137 
 | 
| 
 | 
   138     def addElement(self, element):
 | 
| 
 | 
   139         """
 | 
| 
 | 
   140         Same as "addTranscript"
 | 
| 
 | 
   141         @param element: transcript to be written
 | 
| 
 | 
   142         @type  element: class L{Transcript<Transcript>}
 | 
| 
 | 
   143         """
 | 
| 
 | 
   144         self.addTranscript(element)
 | 
| 
 | 
   145 
 | 
| 
 | 
   146 
 | 
| 
 | 
   147 #   def addTranscriptList(self, transcriptListParser):
 | 
| 
 | 
   148 #       """
 | 
| 
 | 
   149 #       Add a list of transcripts to the transcripts to be written
 | 
| 
 | 
   150 #       @param transcriptListParser: transcripts to be written
 | 
| 
 | 
   151 #       @type  transcriptListParser: class L{TranscriptListParser<TranscriptListParser>}
 | 
| 
 | 
   152 #       """
 | 
| 
 | 
   153 #       progress = Progress(transcriptListParser.getNbTranscripts(), "Storing %s into database" % (transcriptListParser.fileName), self.verbosity)
 | 
| 
 | 
   154 #       for transcript in transcriptListParser.getIterator():
 | 
| 
 | 
   155 #           self.addTranscript(transcript)
 | 
| 
 | 
   156 #           progress.inc()
 | 
| 
 | 
   157 #       progress.done()
 | 
| 
 | 
   158             
 | 
| 
 | 
   159             
 | 
| 
 | 
   160     def addTranscriptList(self, transcriptListParser):
 | 
| 
 | 
   161         """
 | 
| 
 | 
   162         Add a list of transcripts to the transcripts to be written
 | 
| 
 | 
   163         @param transcriptListParser: transcripts to be written
 | 
| 
 | 
   164         @type  transcriptListParser: class L{TranscriptListParser<TranscriptListParser>}
 | 
| 
 | 
   165         """
 | 
| 
 | 
   166         self.transcriptListParser = transcriptListParser
 | 
| 
46
 | 
   167         self.mySqlConnection.executeManyQueriesIterator(self)
 | 
| 
36
 | 
   168             
 | 
| 
 | 
   169             
 | 
| 
 | 
   170     def getIterator(self):
 | 
| 
 | 
   171         """
 | 
| 
 | 
   172         Iterator to the SQL commands to insert the list
 | 
| 
 | 
   173         """
 | 
| 
 | 
   174         progress = Progress(self.transcriptListParser.getNbTranscripts(), "Storing %s into database" % (self.transcriptListParser.fileName), self.verbosity)
 | 
| 
 | 
   175         for transcript in self.transcriptListParser.getIterator():
 | 
| 
 | 
   176             chromosome = transcript.getChromosome()
 | 
| 
 | 
   177             if chromosome not in self.tables:
 | 
| 
 | 
   178                 self.createTable(chromosome)
 | 
| 
 | 
   179             self.nbTranscriptsByChromosome[chromosome] = self.nbTranscriptsByChromosome.get(chromosome, 0) + 1
 | 
| 
 | 
   180             values = transcript.getSqlValues()
 | 
| 
46
 | 
   181             yield "INSERT INTO '%s' (%s) VALUES (%s)" % (self.tables[chromosome].name, ", ".join(self.tables[chromosome].variables), ", ".join([MySqlTable.formatSql(values[variable], self.tables[chromosome].types[variable], self.tables[chromosome].sizes[variable]) for variable in self.tables[chromosome].variables]))
 | 
| 
36
 | 
   182             progress.inc()
 | 
| 
 | 
   183         progress.done()
 | 
| 
 | 
   184             
 | 
| 
 | 
   185             
 | 
| 
 | 
   186     def write(self):
 | 
| 
 | 
   187         """
 | 
| 
 | 
   188         Copy the content of the files into the database
 | 
| 
 | 
   189         (May add transcripts to already created databases)
 | 
| 
 | 
   190         """
 | 
| 
 | 
   191         for chromosome in self.transcriptValues:
 | 
| 
 | 
   192             if chromosome in self.transcriptValues:
 | 
| 
46
 | 
   193                 self.tables[chromosome].insertMany(self.transcriptValues[chromosome])
 | 
| 
36
 | 
   194         self.transcriptValues = {}
 | 
| 
 | 
   195         self.toBeWritten      = False
 | 
| 
 | 
   196             
 | 
| 
 | 
   197             
 | 
| 
 | 
   198     def getTables(self):
 | 
| 
 | 
   199         """
 | 
| 
 | 
   200         Get the tables
 | 
| 
 | 
   201         @return: the mySQL tables
 | 
| 
 | 
   202         """
 | 
| 
 | 
   203         if self.toBeWritten:
 | 
| 
 | 
   204             self.write()
 | 
| 
 | 
   205         return self.tables
 | 
| 
 | 
   206 
 | 
| 
 | 
   207             
 | 
| 
 | 
   208             
 | 
| 
 | 
   209     def removeTables(self):
 | 
| 
 | 
   210         """
 | 
| 
 | 
   211         Drop the tables
 | 
| 
 | 
   212         """
 | 
| 
 | 
   213         for chromosome in self.tables:
 | 
| 
46
 | 
   214             self.tables[chromosome].remove()
 |