6
|
1 #
|
|
2 # Copyright INRA-URGI 2009-2010
|
|
3 #
|
|
4 # This software is governed by the CeCILL license under French law and
|
|
5 # abiding by the rules of distribution of free software. You can use,
|
|
6 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
7 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
8 # "http://www.cecill.info".
|
|
9 #
|
|
10 # As a counterpart to the access to the source code and rights to copy,
|
|
11 # modify and redistribute granted by the license, users are provided only
|
|
12 # with a limited warranty and the software's author, the holder of the
|
|
13 # economic rights, and the successive licensors have only limited
|
|
14 # liability.
|
|
15 #
|
|
16 # In this respect, the user's attention is drawn to the risks associated
|
|
17 # with loading, using, modifying and/or developing or reproducing the
|
|
18 # software by the user in light of its specific status of free software,
|
|
19 # that may mean that it is complicated to manipulate, and that also
|
|
20 # therefore means that it is reserved for developers and experienced
|
|
21 # professionals having in-depth computer knowledge. Users are therefore
|
|
22 # encouraged to load and test the software's suitability as regards their
|
|
23 # requirements in conditions enabling the security of their systems and/or
|
|
24 # data to be ensured and, more generally, to use and operate it in the
|
|
25 # same conditions as regards security.
|
|
26 #
|
|
27 # The fact that you are presently reading this means that you have had
|
|
28 # knowledge of the CeCILL license and that you accept its terms.
|
|
29 #
|
|
30 import re
|
|
31 import sys
|
|
32 from commons.core.parsing.ParserChooser import ParserChooser
|
|
33 from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable
|
|
34 from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
|
|
35
|
|
36 class TranscriptContainer(object):
|
|
37 """
|
|
38 An interface class that contains a list of transcripts, handle different formats
|
|
39 @ivar container: container of the data
|
|
40 @type container: string
|
|
41 @ivar format: format of the data
|
|
42 @type format: string
|
|
43 @ivar transcriptListParser: possibly contains a parser to a list of transcripts
|
|
44 @type transcriptListParser: L{TranscriptListParser<TranscriptListParser>} or None
|
|
45 @ivar mappingListParser: possibly contains a parser to a list of mappings
|
|
46 @type mappingListParser: L{MapperParser<MapperParser>} or None
|
|
47 @ivar transcriptTables: possibly contains the mySQL tables
|
|
48 @type transcriptTables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>} or None
|
|
49 @ivar mySqlConnection: connection to a MySQL database
|
|
50 @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}
|
|
51 @ivar type: type of the data (transcripts, mappings or mySQL)
|
|
52 @type type: string
|
|
53 @ivar verbosity: verbosity
|
|
54 @type verbosity: int
|
|
55 """
|
|
56
|
|
57 def __init__(self, container, format, verbosity = 0):
|
|
58 """
|
|
59 Constructor
|
|
60 @param container: container of the data
|
|
61 @type container: string
|
|
62 @param format: format of the data
|
|
63 @type format: string
|
|
64 @param verbosity: verbosity
|
|
65 @type verbosity: int
|
|
66 """
|
|
67 self.container = container
|
|
68 self.format = format
|
|
69 self.verbosity = verbosity
|
|
70 self.transcriptListParser = None
|
|
71 self.mappingListParser = None
|
|
72 self.transcriptTables = {}
|
|
73 self.mySqlConnection = None
|
|
74 self.foundData = False
|
|
75 self.nbTranscripts = None
|
|
76 self.nbNucleotides = None
|
|
77 self.chromosomes = None
|
|
78 self.type = None
|
|
79 if self.container == None:
|
|
80 sys.exit("Error! Container input file name is empty!")
|
|
81 if self.format == None:
|
|
82 sys.exit("Error! Container input format is empty!")
|
|
83
|
|
84
|
|
85 def findData(self):
|
|
86 """
|
|
87 Load data
|
|
88 """
|
|
89 if self.format == None:
|
|
90 sys.exit("Error! Format is not specified!")
|
|
91 if self.format == "sql":
|
|
92 self.transcriptTables = {}
|
|
93 self.chromosomes = []
|
|
94 self.nbTranscripts = 0
|
|
95 self.nbNucleotides = 0
|
|
96 self.type = "sql"
|
|
97 query = self.mySqlConnection.executeQuery("SELECT name FROM sqlite_master WHERE type LIKE 'table' AND name LIKE '%s_%%_transcripts'" % (self.container))
|
|
98 for line in query.getIterator():
|
|
99 tableName = line[0]
|
|
100 m = re.search(r"^(\S*)_transcripts$", tableName[len(self.container)+1:])
|
|
101 if m == None:
|
|
102 sys.exit("Table '%s' has a strange name" % (tableName))
|
|
103 chromosome = m.group(1)
|
|
104 self.transcriptTables[chromosome] = MySqlTranscriptTable(self.mySqlConnection, self.container, chromosome, self.verbosity)
|
|
105 self.chromosomes.append(chromosome)
|
|
106 for transcript in self.transcriptTables[chromosome].getIterator():
|
|
107 self.nbTranscripts += 1
|
|
108 self.nbNucleotides += transcript.getSize()
|
|
109 if self.type == None:
|
|
110 parserChooser = ParserChooser(self.verbosity)
|
|
111 parserChooser.findFormat(self.format)
|
|
112 self.type = parserChooser.getType()
|
|
113 if self.type == "transcript":
|
|
114 self.transcriptListParser = parserChooser.getParser(self.container)
|
|
115 elif self.type == "mapping":
|
|
116 self.mappingListParser = parserChooser.getParser(self.container)
|
|
117 else:
|
|
118 sys.exit("Error! Cannot handle format '%s'!" % (self.format))
|
|
119 if self.type == None:
|
|
120 sys.exit("Error! Cannot handle format '%s'!" % (self.format))
|
|
121
|
|
122 if self.transcriptListParser != None:
|
|
123 if self.type == "transcript":
|
|
124 self.nbTranscripts = self.transcriptListParser.getNbTranscripts()
|
|
125 self.nbNucleotides = self.transcriptListParser.getNbNucleotides()
|
|
126 self.chromosomes = self.transcriptListParser.getChromosomes()
|
|
127 if self.mappingListParser != None:
|
|
128 if self.type == "mapping":
|
|
129 self.nbTranscripts = self.mappingListParser.getNbMappings()
|
|
130 self.nbNucleotides = self.mappingListParser.getNbNucleotides()
|
|
131 self.chromosomes = self.mappingListParser.getChromosomes()
|
|
132
|
|
133 self.foundData = True
|
|
134
|
|
135
|
|
136 def getNbTranscripts(self):
|
|
137 """
|
|
138 Get the number of transcripts
|
|
139 @return: the number of transcripts
|
|
140 """
|
|
141 if not self.foundData:
|
|
142 self.findData()
|
|
143 return self.nbTranscripts
|
|
144
|
|
145
|
|
146 def getNbItems(self):
|
|
147 """
|
|
148 Same as getNbTranscripts
|
|
149 """
|
|
150 return self.getNbTranscripts()
|
|
151
|
|
152
|
|
153 def getNbNucleotides(self):
|
|
154 """
|
|
155 Get the number of nucleotides
|
|
156 @return: the number of nucleotides
|
|
157 """
|
|
158 if not self.foundData:
|
|
159 self.findData()
|
|
160 return self.nbNucleotides
|
|
161
|
|
162
|
|
163 def getChromosomes(self):
|
|
164 """
|
|
165 Get the chromosomes
|
|
166 @return: the chromosomes
|
|
167 """
|
|
168 if not self.foundData:
|
|
169 self.findData()
|
|
170 return self.chromosomes
|
|
171
|
|
172
|
|
173 def getIterator(self):
|
|
174 """
|
|
175 An iterator
|
|
176 @return: an iterator to a list of transcripts
|
|
177 """
|
|
178 if not self.foundData:
|
|
179 self.findData()
|
|
180 if self.type == "sql":
|
|
181 for chromosome in self.transcriptTables:
|
|
182 for transcript in self.transcriptTables[chromosome].getIterator():
|
|
183 yield transcript
|
|
184 return
|
|
185 if self.type == "transcript":
|
|
186 for transcript in self.transcriptListParser.getIterator():
|
|
187 yield transcript
|
|
188 return
|
|
189 if self.type == "mapping":
|
|
190 for mapping in self.mappingListParser.getIterator():
|
|
191 yield mapping.getTranscript()
|
|
192 return
|
|
193 sys.exit("Error! No valid transcript container given!")
|
|
194
|
|
195
|
|
196 def storeIntoDatabase(self, name = None):
|
|
197 """
|
|
198 Store the current transcript / mapping list into database
|
|
199 """
|
|
200 if not self.foundData:
|
|
201 self.findData()
|
|
202
|
|
203 if (self.transcriptListParser == None and self.mappingListParser == None) or len(self.transcriptTables.keys()) != 0:
|
|
204 return
|
|
205
|
|
206 mySqlTranscriptWriter = MySqlTranscriptWriter(self.mySqlConnection, name, self.verbosity)
|
|
207 mySqlTranscriptWriter.addTranscriptList(self.transcriptListParser if self.transcriptListParser else self.mappingListParser)
|
|
208 mySqlTranscriptWriter.write()
|
|
209 self.transcriptTables = mySqlTranscriptWriter.getTables()
|
|
210 self.type = "sql"
|
|
211
|
|
212
|
|
213 def getTables(self):
|
|
214 """
|
|
215 Accessor to the mySQL tables
|
|
216 @return: the mySQL tables
|
|
217 """
|
|
218 return self.transcriptTables
|
|
219
|
|
220
|
|
221 def setDefaultTagValue(self, name, value):
|
|
222 """
|
|
223 Set the given tag to the value for all transcripts
|
|
224 @param name: name of the tag
|
|
225 @type name: string
|
|
226 @param value: value of the tag
|
|
227 @type value: string
|
|
228 """
|
|
229 if self.type == "sql":
|
|
230 for chromosome in self.transcriptTables:
|
|
231 self.transcriptTables[chromosome].setDefaultTagValue(name, value)
|
|
232 elif self.type == "transcript":
|
|
233 self.transcriptListParser.setDefaultTagValue(name, value)
|
|
234 elif self.type == "mapping":
|
|
235 self.mappingListParser.setDefaultTagValue(name, value)
|
|
236
|