comparison commons/tools/GameXmlMaker.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 #!/usr/bin/env python
2
3 ##@file GameXmlMaker.py
4
5 # Copyright INRA (Institut National de la Recherche Agronomique)
6 # http://www.inra.fr
7 # http://urgi.versailles.inra.fr
8 #
9 # This software is governed by the CeCILL license under French law and
10 # abiding by the rules of distribution of free software. You can use,
11 # modify and/ or redistribute the software under the terms of the CeCILL
12 # license as circulated by CEA, CNRS and INRIA at the following URL
13 # "http://www.cecill.info".
14 #
15 # As a counterpart to the access to the source code and rights to copy,
16 # modify and redistribute granted by the license, users are provided only
17 # with a limited warranty and the software's author, the holder of the
18 # economic rights, and the successive licensors have only limited
19 # liability.
20 #
21 # In this respect, the user's attention is drawn to the risks associated
22 # with loading, using, modifying and/or developing or reproducing the
23 # software by the user in light of its specific status of free software,
24 # that may mean that it is complicated to manipulate, and that also
25 # therefore means that it is reserved for developers and experienced
26 # professionals having in-depth computer knowledge. Users are therefore
27 # encouraged to load and test the software's suitability as regards their
28 # requirements in conditions enabling the security of their systems and/or
29 # data to be ensured and, more generally, to use and operate it in the
30 # same conditions as regards security.
31 #
32 # The fact that you are presently reading this means that you have had
33 # knowledge of the CeCILL license and that you accept its terms.
34
35 import os
36 import glob
37 import sys
38 import xml.dom.minidom
39 from commons.core.utils.RepetOptionParser import RepetOptionParser
40 from commons.core.utils.FileUtils import FileUtils
41 from commons.core.seq.BioseqDB import BioseqDB
42 from commons.core.sql.DbFactory import DbFactory
43 from commons.core.sql.TablePathAdaptator import TablePathAdaptator
44 from commons.core.sql.TableSetAdaptator import TableSetAdaptator
45 from commons.core.sql.TableMapAdaptator import TableMapAdaptator
46
47 ## GameXmlMaker exports .
48 #
49 class GameXmlMaker(object):
50
51 def __init__(self, inFastaName = "", tablesFileName = "", configFileName = "", verbose = 0):
52 self._inFastaName = inFastaName
53 self._tablesFileName = tablesFileName
54 self._configFileName = configFileName
55 self._verbose = verbose
56 self._gameXMLFileName = ""
57
58 def setAttributesFromCmdLine(self):
59 description = "GameXmlMaker with -f option <=> step 1 : create gff files (write only the sequence and not the annotation. Only one sequence in each file)\n"
60 description += "GameXmlMaker with -t option <=> step 2 : add annotations in each file\n"
61 parser = RepetOptionParser(description = description)
62 parser.add_option("-f", "--inseq", dest = "inFastaName", action = "store", type = "string", help = "'fasta' file or 'seq' table recording the input sequences (required to generate new '.gamexml' files)", default = "")
63 parser.add_option("-t", "--tablesfile", dest = "tablesFileName", action = "store", type = "string", help = "tabulated file of table name to use to update the GameXML files (fields: tier name, format, table name)", default = "")
64 parser.add_option("-g", "--gameXML", dest = "gameXML", action = "store", type = "string", help = "gameXML file to update (if not specified, update all gameXML files in directory", default = "")
65 parser.add_option("-C", "--config", dest = "configFileName", action = "store", type = "string", help = "configuration file for database connection", default = "")
66 parser.add_option("-v", "--verbose", dest = "verbose", action = "store", type = "int", help = "verbosity level (default=0, else 1 or 2)", default = 0)
67 (options, args) = parser.parse_args()
68 self._setAttributesFromOptions(options)
69
70 def _setAttributesFromOptions(self, options):
71 self.setInFastaName(options.inFastaName)
72 self.setTablesFileName(options.tablesFileName)
73 self.setGameXMLFileName(options.gameXML)
74 self.setConfigFileName(options.configFileName)
75 self.setVerbose(options.verbose)
76
77 def setInFastaName(self, inFastaName):
78 self._inFastaName = inFastaName
79
80 def setTablesFileName(self, tablesFileName):
81 self._tablesFileName = tablesFileName
82
83 def setGameXMLFileName(self, gamexmlFileName):
84 self._gameXMLFileName = gamexmlFileName
85
86 def setConfigFileName(self, configFileName):
87 self._configFileName = configFileName
88
89 def setVerbose(self, verbose):
90 self._verbose = verbose
91
92 def checkOptions(self):
93 if self._inFastaName == "" and self._tablesFileName == "":
94 raise Exception("ERROR: options -f or -t required")
95
96 if self._configFileName != "":
97 if not FileUtils.isRessourceExists(self._configFileName):
98 raise Exception("ERROR: configuration file does not exist!")
99
100 def run(self):
101 self.checkOptions()
102 if self._verbose > 0:
103 print "START GameXmlMaker"
104 sys.stdout.flush()
105
106 if self._inFastaName != "":
107 self._createGameXMLFiles()
108
109 if self._tablesFileName != "":
110 lXMLNewFile = []
111 if self._gameXMLFileName == "":
112 lXMLNewFile = glob.glob("*.gamexml")
113 else:
114 lXMLNewFile.append(self._gameXMLFileName)
115
116 for newGamexmlFile in lXMLNewFile:
117 self._updateGameXMLFileFromlTablesFile(newGamexmlFile)
118
119 if self._verbose > 0:
120 print "END GFF3Maker"
121 sys.stdout.flush()
122
123 ## Create as many XML files as sequences given in fasta file.
124 #
125 def _createGameXMLFiles(self):
126 if self._verbose > 0:
127 print "reading file %s" % self._inFastaName
128 sys.stdout.flush()
129
130 iBioseqDB = BioseqDB(self._inFastaName)
131
132 if self._verbose > 0:
133 print "nb of sequences = %i" % iBioseqDB.getSize()
134 sys.stdout.flush()
135
136 for iBioseq in iBioseqDB.db:
137 self._writeGameXMLFileFromBioseq(iBioseq)
138
139 def _writeGameXMLFileFromBioseq(self, iBioseq):
140 """
141 write new '.gamexml' file with <game> and <seq> tags
142 """
143 docXML = xml.dom.minidom.getDOMImplementation().createDocument(None, 'game', None)
144 root = docXML.documentElement
145
146 seq = docXML.createElement('seq')
147 seq.setAttribute('id', iBioseq.getHeader())
148 seq.setAttribute('focus', 'true')
149 root.appendChild(seq)
150
151 seqNameTag = docXML.createElement('name')
152 seqNameTag.appendChild(docXML.createTextNode(iBioseq.getHeader()))
153 seq.appendChild(seqNameTag)
154
155 residuesTag = docXML.createElement('residues')
156 residuesTag.appendChild(docXML.createTextNode(iBioseq.getSequence()))
157 seq.appendChild(residuesTag)
158
159 mapPos = docXML.createElement('map_position')
160 root.appendChild(mapPos)
161
162 arm = docXML.createElement('arm')
163 arm.appendChild(docXML.createTextNode(iBioseq.getHeader()))
164 mapPos.appendChild(arm)
165
166 span = docXML.createElement('span')
167 mapPos.appendChild(span)
168
169 start = docXML.createElement('start')
170 start.appendChild(docXML.createTextNode('1'))
171 span.appendChild(start)
172
173 end = docXML.createElement('end')
174 end.appendChild(docXML.createTextNode(str(iBioseq.getLength())))
175 span.appendChild(end)
176
177 fileName = "%s.gamexml" % iBioseq.getHeader()
178 docXML.writexml(open(fileName, "w"))
179
180 if self._verbose > 0:
181 print "file '%s' written" % fileName
182 sys.stdout.flush()
183
184
185 def _parseResultSpanInfo(self, element, type):
186 Qstart = -1
187 Qend = -1
188 Sstart = -1
189 Send = -1
190 query = ""
191 subject = ""
192 identity = 0
193 id = ""
194
195 if type == "path":
196 Qstart = element.getQueryStart()
197 Qend = element.getQueryEnd()
198 Sstart = element.getSubjectStart()
199 Send = element.getSubjectEnd()
200 query = element.getQueryName()
201 subject = element.getSubjectName()
202 identity = element.getIdentity()
203 id = element.getIdentifier()
204
205 elif type == "set":
206 Qstart = element.getStart()
207 Qend = element.getEnd()
208 query = element.getName()
209 subject = element.getName()
210 id = element.getId()
211
212 elif type == "map":
213 Qstart = element.getStart()
214 Qend = element.getEnd()
215 query = element.getSeqname()
216 subject = element.getName()
217 id = "-1"
218
219 return (Qstart,Qend,Sstart,Send,query,subject,identity,id)
220
221 def _addPathSpan(self, docXML, spanInfo, parent):
222 Qstart,Qend,Sstart,Send,query,subject,identity,id = spanInfo
223 Qstart = str(Qstart)
224 Qend = str(Qend)
225 Sstart = str(Sstart)
226 Send = str(Send)
227 identity = str(identity)
228 id = str(id)
229
230 resultSpan = docXML.createElement('result_span')
231 parent.appendChild(resultSpan)
232
233 relship1 = docXML.createElement('seq_relationship')
234 relship1.setAttribute('type', 'query')
235 relship1.setAttribute('seq', query)
236 relship2 = docXML.createElement('seq_relationship')
237 relship2.setAttribute('type', 'subject')
238 relship2.setAttribute('seq', '%s::%s' % (subject, id))
239 score = docXML.createElement('score')
240 resultSpan.appendChild(relship1)
241 resultSpan.appendChild(relship2)
242 score.appendChild(docXML.createTextNode(identity))
243 resultSpan.appendChild(score)
244 sp1 = docXML.createElement('span')
245 sp2 = docXML.createElement('span')
246 start1 = docXML.createElement('start')
247 start1.appendChild(docXML.createTextNode(Qstart))
248 start2 = docXML.createElement('start')
249 start2.appendChild(docXML.createTextNode(Sstart))
250 end1 = docXML.createElement('end')
251 end1.appendChild(docXML.createTextNode(Qend))
252 end2 = docXML.createElement('end')
253 end2.appendChild(docXML.createTextNode(Send))
254 relship1.appendChild(sp1)
255 sp1.appendChild(start1)
256 sp1.appendChild(end1)
257 relship2.appendChild(sp2)
258 sp2.appendChild(start2)
259 sp2.appendChild(end2)
260
261 def _addComputationalAnalysisTags(self, docXML, programName):
262 computationalAnalysis = None
263
264 lComputationalAnalysis = docXML.getElementsByTagName('computational_analysis')
265 for computationalAnalysisTag in lComputationalAnalysis:
266 if computationalAnalysisTag.getElementsByTagName("program")[0].nodeValue == programName:
267 computationalAnalysis = computationalAnalysisTag
268 break
269
270 if computationalAnalysis == None :
271 computationalAnalysis = docXML.createElement('computational_analysis')
272 root = docXML.documentElement
273 root.appendChild(computationalAnalysis)
274
275 program = docXML.createElement('program')
276 program.appendChild(docXML.createTextNode(programName))
277 computationalAnalysis.appendChild(program)
278
279 db = docXML.createElement('database')
280 db.appendChild(docXML.createTextNode('db'))
281 computationalAnalysis.appendChild(db)
282
283 return computationalAnalysis
284
285 def _addResultSetFromPath(self, docXML, iPath, parent):
286 computationalAnalysis = parent
287 resultSet = docXML.createElement('result_set')
288 resultSet.setAttribute('id', str(iPath.getIdentifier()))
289 computationalAnalysis.appendChild(resultSet)
290 resultSetName = docXML.createElement('name')
291 resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iPath.getSubjectName(),str(iPath.getIdentifier()))))
292 resultSet.appendChild(resultSetName)
293 return resultSet
294
295 def _addResultSetFromSet(self, docXML, iSet, parent):
296 computationalAnalysis = parent
297 resultSet = docXML.createElement('result_set')
298 resultSet.setAttribute('id', str(iSet.getId()))
299 computationalAnalysis.appendChild(resultSet)
300 resultSetName = docXML.createElement('name')
301 resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iSet.getName(),str(iSet.getId()))))
302 resultSet.appendChild(resultSetName)
303 return resultSet
304
305 def _addResultSetFromMap(self, docXML, iMap, parent):
306 computationalAnalysis = parent
307 resultSet = docXML.createElement('result_set')
308 resultSet.setAttribute('id', "-1")
309 computationalAnalysis.appendChild(resultSet)
310 resultSetName = docXML.createElement('name')
311 resultSetName.appendChild(docXML.createTextNode("%s::%s" %(iMap.getName(), "-1")))
312 resultSet.appendChild(resultSetName)
313 return resultSet
314
315 def _updateGameXMLFileFromlTablesFile(self, gameXMLFile):
316 docXML = xml.dom.minidom.parse(gameXMLFile)
317
318 f = open(self._tablesFileName, "r")
319 line = f.readline()
320
321 while line:
322 if not line.startswith("#"):
323 list = line.split()
324 programName = list[0]
325 format = list[1]
326 table = list[2]
327
328 gameXMLFileName = os.path.splitext(gameXMLFile)[0]
329 computationalAnalysis = self._addComputationalAnalysisTags(docXML,programName)
330 if format == "path":
331 iDB = DbFactory.createInstance(self._configFileName)
332 iTpa = TablePathAdaptator(iDB, table)
333
334 lPaths = iTpa.getPathListFromQuery(gameXMLFileName)
335 dResultSets = {}
336
337 for iPath in lPaths:
338 if dResultSets.get(iPath.getIdentifier()) is None:
339 resultSet = self._addResultSetFromPath(docXML, iPath,computationalAnalysis)
340 dResultSets[iPath.getIdentifier()] = resultSet
341 else:
342 resultSet = dResultSets[iPath.getIdentifier()]
343 spanInfo = self._parseResultSpanInfo(iPath, "path")
344 self._addPathSpan(docXML, spanInfo, resultSet)
345
346 if format == "set":
347 iDB = DbFactory.createInstance(self._configFileName)
348 iTsa = TableSetAdaptator(iDB, table)
349 lSet = iTsa.getSetListFromSeqName(gameXMLFileName)
350
351 dResultSets = {}
352
353 for iSet in lSet:
354 if dResultSets.get(iSet.getId()) is None:
355 resultSet = self._addResultSetFromSet(docXML, iSet,computationalAnalysis)
356 dResultSets[iSet.getId()] = resultSet
357 else:
358 resultSet = dResultSets[iSet.getId()]
359
360 spanInfo = self._parseResultSpanInfo(iSet, "set")
361 self._addPathSpan(docXML, spanInfo, resultSet)
362
363 if format == "map":
364 iDB = DbFactory.createInstance(self._configFileName)
365 iTma = TableMapAdaptator(iDB, table)
366 lMap = iTma.getMapListFromChr(gameXMLFileName)
367 dResultSets = {}
368 for iMap in lMap:
369 resultSet = self._addResultSetFromMap(docXML, iMap,computationalAnalysis)
370 spanInfo = self._parseResultSpanInfo(iMap, "map")
371 self._addPathSpan(docXML, spanInfo, resultSet)
372
373 line = f.readline()
374
375 outputGameXMLFile = open(gameXMLFile, "w")
376 xmlstr = docXML.toxml()
377 outputGameXMLFile.write(xmlstr)
378 outputGameXMLFile.close()
379
380 if __name__ == "__main__":
381 iGameXmlMaker = GameXmlMaker()
382 iGameXmlMaker.setAttributesFromCmdLine()
383 iGameXmlMaker.run()
384