Mercurial > repos > yufei-luo > s_mart
comparison commons/tools/GameXmlMaker.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
comparison
equal
deleted
inserted
replaced
30:5677346472b5 | 31:0ab839023fe4 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ##@file GameXmlMaker.py | |
4 | |
5 # Copyright INRA (Institut National de la Recherche Agronomique) | |
6 # http://www.inra.fr | |
7 # http://urgi.versailles.inra.fr | |
8 # | |
9 # This software is governed by the CeCILL license under French law and | |
10 # abiding by the rules of distribution of free software. You can use, | |
11 # modify and/ or redistribute the software under the terms of the CeCILL | |
12 # license as circulated by CEA, CNRS and INRIA at the following URL | |
13 # "http://www.cecill.info". | |
14 # | |
15 # As a counterpart to the access to the source code and rights to copy, | |
16 # modify and redistribute granted by the license, users are provided only | |
17 # with a limited warranty and the software's author, the holder of the | |
18 # economic rights, and the successive licensors have only limited | |
19 # liability. | |
20 # | |
21 # In this respect, the user's attention is drawn to the risks associated | |
22 # with loading, using, modifying and/or developing or reproducing the | |
23 # software by the user in light of its specific status of free software, | |
24 # that may mean that it is complicated to manipulate, and that also | |
25 # therefore means that it is reserved for developers and experienced | |
26 # professionals having in-depth computer knowledge. Users are therefore | |
27 # encouraged to load and test the software's suitability as regards their | |
28 # requirements in conditions enabling the security of their systems and/or | |
29 # data to be ensured and, more generally, to use and operate it in the | |
30 # same conditions as regards security. | |
31 # | |
32 # The fact that you are presently reading this means that you have had | |
33 # knowledge of the CeCILL license and that you accept its terms. | |
34 | |
35 import os | |
36 import glob | |
37 import sys | |
38 import xml.dom.minidom | |
39 from commons.core.utils.RepetOptionParser import RepetOptionParser | |
40 from commons.core.utils.FileUtils import FileUtils | |
41 from commons.core.seq.BioseqDB import BioseqDB | |
42 from commons.core.sql.DbFactory import DbFactory | |
43 from commons.core.sql.TablePathAdaptator import TablePathAdaptator | |
44 from commons.core.sql.TableSetAdaptator import TableSetAdaptator | |
45 from commons.core.sql.TableMapAdaptator import TableMapAdaptator | |
46 | |
47 ## GameXmlMaker exports . | |
48 # | |
49 class GameXmlMaker(object): | |
50 | |
51 def __init__(self, inFastaName = "", tablesFileName = "", configFileName = "", verbose = 0): | |
52 self._inFastaName = inFastaName | |
53 self._tablesFileName = tablesFileName | |
54 self._configFileName = configFileName | |
55 self._verbose = verbose | |
56 self._gameXMLFileName = "" | |
57 | |
58 def setAttributesFromCmdLine(self): | |
59 description = "GameXmlMaker with -f option <=> step 1 : create gff files (write only the sequence and not the annotation. Only one sequence in each file)\n" | |
60 description += "GameXmlMaker with -t option <=> step 2 : add annotations in each file\n" | |
61 parser = RepetOptionParser(description = description) | |
62 parser.add_option("-f", "--inseq", dest = "inFastaName", action = "store", type = "string", help = "'fasta' file or 'seq' table recording the input sequences (required to generate new '.gamexml' files)", default = "") | |
63 parser.add_option("-t", "--tablesfile", dest = "tablesFileName", action = "store", type = "string", help = "tabulated file of table name to use to update the GameXML files (fields: tier name, format, table name)", default = "") | |
64 parser.add_option("-g", "--gameXML", dest = "gameXML", action = "store", type = "string", help = "gameXML file to update (if not specified, update all gameXML files in directory", default = "") | |
65 parser.add_option("-C", "--config", dest = "configFileName", action = "store", type = "string", help = "configuration file for database connection", default = "") | |
66 parser.add_option("-v", "--verbose", dest = "verbose", action = "store", type = "int", help = "verbosity level (default=0, else 1 or 2)", default = 0) | |
67 (options, args) = parser.parse_args() | |
68 self._setAttributesFromOptions(options) | |
69 | |
70 def _setAttributesFromOptions(self, options): | |
71 self.setInFastaName(options.inFastaName) | |
72 self.setTablesFileName(options.tablesFileName) | |
73 self.setGameXMLFileName(options.gameXML) | |
74 self.setConfigFileName(options.configFileName) | |
75 self.setVerbose(options.verbose) | |
76 | |
77 def setInFastaName(self, inFastaName): | |
78 self._inFastaName = inFastaName | |
79 | |
80 def setTablesFileName(self, tablesFileName): | |
81 self._tablesFileName = tablesFileName | |
82 | |
83 def setGameXMLFileName(self, gamexmlFileName): | |
84 self._gameXMLFileName = gamexmlFileName | |
85 | |
86 def setConfigFileName(self, configFileName): | |
87 self._configFileName = configFileName | |
88 | |
89 def setVerbose(self, verbose): | |
90 self._verbose = verbose | |
91 | |
92 def checkOptions(self): | |
93 if self._inFastaName == "" and self._tablesFileName == "": | |
94 raise Exception("ERROR: options -f or -t required") | |
95 | |
96 if self._configFileName != "": | |
97 if not FileUtils.isRessourceExists(self._configFileName): | |
98 raise Exception("ERROR: configuration file does not exist!") | |
99 | |
100 def run(self): | |
101 self.checkOptions() | |
102 if self._verbose > 0: | |
103 print "START GameXmlMaker" | |
104 sys.stdout.flush() | |
105 | |
106 if self._inFastaName != "": | |
107 self._createGameXMLFiles() | |
108 | |
109 if self._tablesFileName != "": | |
110 lXMLNewFile = [] | |
111 if self._gameXMLFileName == "": | |
112 lXMLNewFile = glob.glob("*.gamexml") | |
113 else: | |
114 lXMLNewFile.append(self._gameXMLFileName) | |
115 | |
116 for newGamexmlFile in lXMLNewFile: | |
117 self._updateGameXMLFileFromlTablesFile(newGamexmlFile) | |
118 | |
119 if self._verbose > 0: | |
120 print "END GFF3Maker" | |
121 sys.stdout.flush() | |
122 | |
123 ## Create as many XML files as sequences given in fasta file. | |
124 # | |
125 def _createGameXMLFiles(self): | |
126 if self._verbose > 0: | |
127 print "reading file %s" % self._inFastaName | |
128 sys.stdout.flush() | |
129 | |
130 iBioseqDB = BioseqDB(self._inFastaName) | |
131 | |
132 if self._verbose > 0: | |
133 print "nb of sequences = %i" % iBioseqDB.getSize() | |
134 sys.stdout.flush() | |
135 | |
136 for iBioseq in iBioseqDB.db: | |
137 self._writeGameXMLFileFromBioseq(iBioseq) | |
138 | |
139 def _writeGameXMLFileFromBioseq(self, iBioseq): | |
140 """ | |
141 write new '.gamexml' file with <game> and <seq> tags | |
142 """ | |
143 docXML = xml.dom.minidom.getDOMImplementation().createDocument(None, 'game', None) | |
144 root = docXML.documentElement | |
145 | |
146 seq = docXML.createElement('seq') | |
147 seq.setAttribute('id', iBioseq.getHeader()) | |
148 seq.setAttribute('focus', 'true') | |
149 root.appendChild(seq) | |
150 | |
151 seqNameTag = docXML.createElement('name') | |
152 seqNameTag.appendChild(docXML.createTextNode(iBioseq.getHeader())) | |
153 seq.appendChild(seqNameTag) | |
154 | |
155 residuesTag = docXML.createElement('residues') | |
156 residuesTag.appendChild(docXML.createTextNode(iBioseq.getSequence())) | |
157 seq.appendChild(residuesTag) | |
158 | |
159 mapPos = docXML.createElement('map_position') | |
160 root.appendChild(mapPos) | |
161 | |
162 arm = docXML.createElement('arm') | |
163 arm.appendChild(docXML.createTextNode(iBioseq.getHeader())) | |
164 mapPos.appendChild(arm) | |
165 | |
166 span = docXML.createElement('span') | |
167 mapPos.appendChild(span) | |
168 | |
169 start = docXML.createElement('start') | |
170 start.appendChild(docXML.createTextNode('1')) | |
171 span.appendChild(start) | |
172 | |
173 end = docXML.createElement('end') | |
174 end.appendChild(docXML.createTextNode(str(iBioseq.getLength()))) | |
175 span.appendChild(end) | |
176 | |
177 fileName = "%s.gamexml" % iBioseq.getHeader() | |
178 docXML.writexml(open(fileName, "w")) | |
179 | |
180 if self._verbose > 0: | |
181 print "file '%s' written" % fileName | |
182 sys.stdout.flush() | |
183 | |
184 | |
185 def _parseResultSpanInfo(self, element, type): | |
186 Qstart = -1 | |
187 Qend = -1 | |
188 Sstart = -1 | |
189 Send = -1 | |
190 query = "" | |
191 subject = "" | |
192 identity = 0 | |
193 id = "" | |
194 | |
195 if type == "path": | |
196 Qstart = element.getQueryStart() | |
197 Qend = element.getQueryEnd() | |
198 Sstart = element.getSubjectStart() | |
199 Send = element.getSubjectEnd() | |
200 query = element.getQueryName() | |
201 subject = element.getSubjectName() | |
202 identity = element.getIdentity() | |
203 id = element.getIdentifier() | |
204 | |
205 elif type == "set": | |
206 Qstart = element.getStart() | |
207 Qend = element.getEnd() | |
208 query = element.getName() | |
209 subject = element.getName() | |
210 id = element.getId() | |
211 | |
212 elif type == "map": | |
213 Qstart = element.getStart() | |
214 Qend = element.getEnd() | |
215 query = element.getSeqname() | |
216 subject = element.getName() | |
217 id = "-1" | |
218 | |
219 return (Qstart,Qend,Sstart,Send,query,subject,identity,id) | |
220 | |
221 def _addPathSpan(self, docXML, spanInfo, parent): | |
222 Qstart,Qend,Sstart,Send,query,subject,identity,id = spanInfo | |
223 Qstart = str(Qstart) | |
224 Qend = str(Qend) | |
225 Sstart = str(Sstart) | |
226 Send = str(Send) | |
227 identity = str(identity) | |
228 id = str(id) | |
229 | |
230 resultSpan = docXML.createElement('result_span') | |
231 parent.appendChild(resultSpan) | |
232 | |
233 relship1 = docXML.createElement('seq_relationship') | |
234 relship1.setAttribute('type', 'query') | |
235 relship1.setAttribute('seq', query) | |
236 relship2 = docXML.createElement('seq_relationship') | |
237 relship2.setAttribute('type', 'subject') | |
238 relship2.setAttribute('seq', '%s::%s' % (subject, id)) | |
239 score = docXML.createElement('score') | |
240 resultSpan.appendChild(relship1) | |
241 resultSpan.appendChild(relship2) | |
242 score.appendChild(docXML.createTextNode(identity)) | |
243 resultSpan.appendChild(score) | |
244 sp1 = docXML.createElement('span') | |
245 sp2 = docXML.createElement('span') | |
246 start1 = docXML.createElement('start') | |
247 start1.appendChild(docXML.createTextNode(Qstart)) | |
248 start2 = docXML.createElement('start') | |
249 start2.appendChild(docXML.createTextNode(Sstart)) | |
250 end1 = docXML.createElement('end') | |
251 end1.appendChild(docXML.createTextNode(Qend)) | |
252 end2 = docXML.createElement('end') | |
253 end2.appendChild(docXML.createTextNode(Send)) | |
254 relship1.appendChild(sp1) | |
255 sp1.appendChild(start1) | |
256 sp1.appendChild(end1) | |
257 relship2.appendChild(sp2) | |
258 sp2.appendChild(start2) | |
259 sp2.appendChild(end2) | |
260 | |
261 def _addComputationalAnalysisTags(self, docXML, programName): | |
262 computationalAnalysis = None | |
263 | |
264 lComputationalAnalysis = docXML.getElementsByTagName('computational_analysis') | |
265 for computationalAnalysisTag in lComputationalAnalysis: | |
266 if computationalAnalysisTag.getElementsByTagName("program")[0].nodeValue == programName: | |
267 computationalAnalysis = computationalAnalysisTag | |
268 break | |
269 | |
270 if computationalAnalysis == None : | |
271 computationalAnalysis = docXML.createElement('computational_analysis') | |
272 root = docXML.documentElement | |
273 root.appendChild(computationalAnalysis) | |
274 | |
275 program = docXML.createElement('program') | |
276 program.appendChild(docXML.createTextNode(programName)) | |
277 computationalAnalysis.appendChild(program) | |
278 | |
279 db = docXML.createElement('database') | |
280 db.appendChild(docXML.createTextNode('db')) | |
281 computationalAnalysis.appendChild(db) | |
282 | |
283 return computationalAnalysis | |
284 | |
285 def _addResultSetFromPath(self, docXML, iPath, parent): | |
286 computationalAnalysis = parent | |
287 resultSet = docXML.createElement('result_set') | |
288 resultSet.setAttribute('id', str(iPath.getIdentifier())) | |
289 computationalAnalysis.appendChild(resultSet) | |
290 resultSetName = docXML.createElement('name') | |
291 resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iPath.getSubjectName(),str(iPath.getIdentifier())))) | |
292 resultSet.appendChild(resultSetName) | |
293 return resultSet | |
294 | |
295 def _addResultSetFromSet(self, docXML, iSet, parent): | |
296 computationalAnalysis = parent | |
297 resultSet = docXML.createElement('result_set') | |
298 resultSet.setAttribute('id', str(iSet.getId())) | |
299 computationalAnalysis.appendChild(resultSet) | |
300 resultSetName = docXML.createElement('name') | |
301 resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iSet.getName(),str(iSet.getId())))) | |
302 resultSet.appendChild(resultSetName) | |
303 return resultSet | |
304 | |
305 def _addResultSetFromMap(self, docXML, iMap, parent): | |
306 computationalAnalysis = parent | |
307 resultSet = docXML.createElement('result_set') | |
308 resultSet.setAttribute('id', "-1") | |
309 computationalAnalysis.appendChild(resultSet) | |
310 resultSetName = docXML.createElement('name') | |
311 resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iMap.getName(), "-1"))) | |
312 resultSet.appendChild(resultSetName) | |
313 return resultSet | |
314 | |
315 def _updateGameXMLFileFromlTablesFile(self, gameXMLFile): | |
316 docXML = xml.dom.minidom.parse(gameXMLFile) | |
317 | |
318 f = open(self._tablesFileName, "r") | |
319 line = f.readline() | |
320 | |
321 while line: | |
322 if not line.startswith("#"): | |
323 list = line.split() | |
324 programName = list[0] | |
325 format = list[1] | |
326 table = list[2] | |
327 | |
328 gameXMLFileName = os.path.splitext(gameXMLFile)[0] | |
329 computationalAnalysis = self._addComputationalAnalysisTags(docXML,programName) | |
330 if format == "path": | |
331 iDB = DbFactory.createInstance(self._configFileName) | |
332 iTpa = TablePathAdaptator(iDB, table) | |
333 | |
334 lPaths = iTpa.getPathListFromQuery(gameXMLFileName) | |
335 dResultSets = {} | |
336 | |
337 for iPath in lPaths: | |
338 if dResultSets.get(iPath.getIdentifier()) is None: | |
339 resultSet = self._addResultSetFromPath(docXML, iPath,computationalAnalysis) | |
340 dResultSets[iPath.getIdentifier()] = resultSet | |
341 else: | |
342 resultSet = dResultSets[iPath.getIdentifier()] | |
343 spanInfo = self._parseResultSpanInfo(iPath, "path") | |
344 self._addPathSpan(docXML, spanInfo, resultSet) | |
345 | |
346 if format == "set": | |
347 iDB = DbFactory.createInstance(self._configFileName) | |
348 iTsa = TableSetAdaptator(iDB, table) | |
349 lSet = iTsa.getSetListFromSeqName(gameXMLFileName) | |
350 | |
351 dResultSets = {} | |
352 | |
353 for iSet in lSet: | |
354 if dResultSets.get(iSet.getId()) is None: | |
355 resultSet = self._addResultSetFromSet(docXML, iSet,computationalAnalysis) | |
356 dResultSets[iSet.getId()] = resultSet | |
357 else: | |
358 resultSet = dResultSets[iSet.getId()] | |
359 | |
360 spanInfo = self._parseResultSpanInfo(iSet, "set") | |
361 self._addPathSpan(docXML, spanInfo, resultSet) | |
362 | |
363 if format == "map": | |
364 iDB = DbFactory.createInstance(self._configFileName) | |
365 iTma = TableMapAdaptator(iDB, table) | |
366 lMap = iTma.getMapListFromChr(gameXMLFileName) | |
367 dResultSets = {} | |
368 for iMap in lMap: | |
369 resultSet = self._addResultSetFromMap(docXML, iMap,computationalAnalysis) | |
370 spanInfo = self._parseResultSpanInfo(iMap, "map") | |
371 self._addPathSpan(docXML, spanInfo, resultSet) | |
372 | |
373 line = f.readline() | |
374 | |
375 outputGameXMLFile = open(gameXMLFile, "w") | |
376 xmlstr = docXML.toxml() | |
377 outputGameXMLFile.write(xmlstr) | |
378 outputGameXMLFile.close() | |
379 | |
380 if __name__ == "__main__": | |
381 iGameXmlMaker = GameXmlMaker() | |
382 iGameXmlMaker.setAttributesFromCmdLine() | |
383 iGameXmlMaker.run() | |
384 |