13
|
1 import re
|
|
2 import os
|
|
3 from collections import OrderedDict
|
|
4
|
|
5 DWICKERCODE = {
|
|
6 "ClassI":"RXX",
|
|
7 "ClassII":"DXX",
|
|
8 "LTR":"RLX",
|
|
9 "DIRS":"RYX",
|
|
10 "PLE":"RPX",
|
|
11 "LINE":"RIX",
|
|
12 "SINE":"RSX",
|
|
13 "TIR":"DTX",
|
|
14 "Crypton":"DYX",
|
|
15 "Helitron":"DHX",
|
|
16 "Maverick":"DMX",
|
|
17
|
|
18 "TIR-MITE":"DTX",
|
|
19 "LTR-LARD":"RLX",
|
|
20 "LTR-TRIM":"RLX"
|
|
21 }
|
|
22
|
|
23 class Classif(object):
|
|
24 """ The class Classif is a object what determine a line in classif file.
|
|
25 """
|
|
26
|
|
27 def __init__(self, consensusName = "", code = "NA", outConfuseness = "", outCompleteness = "", projectName = "", isShorten = False, consensusLength = "NA", consensusStrand = "NA", consensusClass = "NA", consensusOrder = "NA", consensusSuperFam = "NA", consensusCI = "NA"):
|
|
28 self._consensusName = consensusName
|
|
29 self._confusness = outConfuseness
|
|
30 self._completeness = outCompleteness
|
|
31 self._projectName = projectName
|
|
32 self._isShorten = isShorten
|
|
33 self._consensusLength = consensusLength
|
|
34 self._consensusStrand = consensusStrand
|
|
35 self._consensusClass = consensusClass
|
|
36 self._consensusOrder = consensusOrder
|
|
37 self._consensusSuperFam = consensusSuperFam
|
|
38 self._consensusCI = consensusCI
|
|
39 self._consensusCoding = ""
|
|
40 self._consensusStruct = ""
|
|
41 self._consensusOther = ""
|
|
42 self._isNoChim = ""
|
|
43 self._hasCodingPart = False
|
|
44 self._hasStructPart = False
|
|
45 self._hasOtherPart = False
|
|
46 self._code = code
|
|
47 self._evidence = {}
|
|
48
|
|
49 def __eq__(self, o):
|
|
50 if type(o) is type(self):
|
|
51 return self._consensusName == o._consensusName and self._code == o._code \
|
|
52 and self._confusness == o._confusness and self._completeness == o._completeness
|
|
53 return False
|
|
54
|
|
55 def __ne__(self, o):
|
|
56 return not self.__eq__(o)
|
|
57
|
|
58 def getConsensusName(self):
|
|
59 return self._consensusName
|
|
60
|
|
61 def getCode(self):
|
|
62 return self._code
|
|
63
|
|
64 def getconfusness(self):
|
|
65 return self._confusness
|
|
66
|
|
67 def getcompleteness(self):
|
|
68 return self._completeness
|
|
69
|
|
70 def getprojectName(self):
|
|
71 return self._projectName
|
|
72
|
|
73 def getConsensusLength(self):
|
|
74 return self._consensusLength
|
|
75
|
|
76 def getConsensusStrand(self):
|
|
77 return self._consensusStrand
|
|
78
|
|
79 def getConsensusClass(self):
|
|
80 return self._consensusClass
|
|
81
|
|
82 def getConsensusOrder(self):
|
|
83 return self._consensusOrder
|
|
84
|
|
85 def getConsensusSuperFamily(self):
|
|
86 return self._consensusSuperFam
|
|
87
|
|
88 def getConsensusCI(self):
|
|
89 return str(self._consensusCI)
|
|
90
|
|
91 def getInfoEvidence(self):
|
|
92 return self._evidence
|
|
93
|
|
94 def getConsensusCoding(self):
|
|
95 if self._confusness == 'ok':
|
|
96 coding = self.writeCodingFeaturesLine(self._evidence)
|
|
97 else:
|
|
98 lOrder = self.getConsensusOrder().split("|")
|
|
99 coding = self.writeCodingFeaturesLine(self._evidence[lOrder[0]])
|
|
100 for order in lOrder[1:]:
|
|
101 if self._evidence[order].keys() != ['other']:
|
|
102 coding = coding + "|" + self.writeCodingFeaturesLine(self._evidence[order])
|
|
103 return "coding=" + coding
|
|
104
|
|
105 def getConsensusStructure(self):
|
|
106 if self._confusness == 'ok':
|
|
107 Structure = self.writeStructFeaturesLine(self._evidence)
|
|
108 else:
|
|
109 lOrder = self.getConsensusOrder().split("|")
|
|
110 Structure = self.writeStructFeaturesLine(self._evidence[lOrder[0]])
|
|
111 for order in lOrder[1:]:
|
|
112 if self._evidence[order].keys() != ['other']:
|
|
113 Structure = Structure + "|" + self.writeStructFeaturesLine(self._evidence[order])
|
|
114 return "struct=" + Structure
|
|
115
|
|
116 def getConsensusOther(self):
|
|
117 if self._confusness == 'ok':
|
|
118 Other = self.writeOtherFeaturesLine(self._evidence)
|
|
119 else:
|
|
120 lOrder = self.getConsensusOrder().split("|")
|
|
121 Other = self.writeOtherFeaturesLine(self._evidence[lOrder[0]])
|
|
122 for order in lOrder[1:]:
|
|
123 Other = Other + "|" + self.writeOtherFeaturesLine(self._evidence[order])
|
|
124 return "other=" + Other
|
|
125
|
|
126 def setConsensusName(self, consensusName):
|
|
127 self._consensusName = consensusName
|
|
128
|
|
129 def setInfoEvidence(self, evidence):
|
|
130 self._evidence = evidence
|
|
131
|
|
132 def setCode(self):
|
|
133 self._code = self._decisionRuleForWickerCode(self.getConsensusClass(), self.getConsensusOrder())
|
|
134
|
|
135 def setConfusness(self, Confusness):
|
|
136 self._confusness = Confusness
|
|
137
|
|
138 def setCompleteness(self, completeness):
|
|
139 self._completeness = completeness
|
|
140
|
|
141 def setProjectName(self, projectName):
|
|
142 self._projectName = projectName
|
|
143
|
|
144 def setConsensusLength(self, cLength):
|
|
145 self._consensusLength = cLength
|
|
146
|
|
147 def setConsensusStrand(self, cStrand):
|
|
148 self._consensusStrand = cStrand
|
|
149
|
|
150 def setConsensusClass(self, cClass):
|
|
151 self._consensusClass = cClass
|
|
152
|
|
153 def setConsensusOrder(self, cOrder):
|
|
154 self._consensusOrder = cOrder
|
|
155
|
|
156 def setConsensusSuperFamily(self, cSuperFamily):
|
|
157 self._consensusSuperFamily = cSuperFamily
|
|
158
|
|
159 def setConsensusCI(self, CI):
|
|
160 self._consensusCI = CI
|
|
161
|
|
162 def setConsensusCoding(self, coding):
|
|
163 self._consensusCoding = coding
|
|
164
|
|
165 def setConsensusStructure(self, structure):
|
|
166 self._consensusStruct = structure
|
|
167
|
|
168 def setConsensusOther(self, other):
|
|
169 self._consensusOther = other
|
|
170
|
|
171 def setCodStrOthFromMessage(self, dico):
|
|
172 self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico)
|
|
173 self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico)
|
|
174 self._consensusOther = "other="+self.writeOtherFeaturesLine(dico)
|
|
175
|
|
176 def setCodStrOthFromMessage2(self, dico, cOrder):
|
|
177 if 'rDNA' in cOrder:
|
|
178 cOrder = cOrder.replace('rDNA', 'RDNA')
|
|
179 lOrder = cOrder.split("|")
|
|
180 lDicoKeys = dico.keys()
|
|
181 if lOrder[0] not in lDicoKeys:
|
|
182 self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico)
|
|
183 self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico)
|
|
184 self._consensusOther = "other="+self.writeOtherFeaturesLine(dico)
|
|
185 else:
|
|
186 self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico[lDicoKeys[0]])
|
|
187 self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico[lDicoKeys[0]])
|
|
188 self._consensusOther = "other="+self.writeOtherFeaturesLine(dico[lDicoKeys[0]])
|
|
189 if len(lDicoKeys) != 1:
|
|
190 for order in lDicoKeys[1:]:
|
|
191 if dico[order].keys() == ['other']:
|
|
192 self._consensusOther = self._consensusOther+"|"+self.writeOtherFeaturesLine(dico[order])
|
|
193 else:
|
|
194 self._consensusCoding = self._consensusCoding+"|"+self.writeCodingFeaturesLine(dico[order])
|
|
195 self._consensusStruct = self._consensusStruct+"|"+self.writeStructFeaturesLine(dico[order])
|
|
196 self._consensusOther = self._consensusOther+"|"+self.writeOtherFeaturesLine(dico[order])
|
|
197
|
|
198 def createNewConsensusName(self):
|
|
199 pastecClassif = "%s" % self._code
|
|
200 if self._completeness != "":
|
|
201 pastecClassif += "-%s" % self._completeness
|
|
202 if self._confusness != "":
|
|
203 pastecClassif += "-%s" % self._confusness
|
|
204 if self._isShorten:
|
|
205 pattern = "%s_[a-zA-Z0-9]+_[a-zA-Z0-9]+_[a-zA-Z0-9_]+" % self._projectName
|
|
206 if re.match(pattern, self._consensusName) and not "%s_RS_" % self._projectName in self._consensusName:
|
|
207 header = self.shortenConsensusName()
|
|
208 header = "%s_%s" % (pastecClassif, header)
|
|
209 else:
|
|
210 header = "%s_%s" % (pastecClassif, self._consensusName)
|
|
211 else:
|
|
212 header = "%s_%s" % (pastecClassif, self._consensusName)
|
|
213
|
|
214 return header
|
|
215
|
|
216 def shortenConsensusName(self):
|
|
217 desc = self._consensusName.split(self._projectName)[1]
|
|
218 palignMeth = desc.split("_")[1]
|
|
219 clustMeth = desc.split("_")[2]
|
|
220 clustID = desc.split("_")[3]
|
|
221 lmalignMeth = desc.split("_")[4:]
|
|
222 if len(lmalignMeth) > 2:
|
|
223 malignMeth = "%s%s_%s" % (lmalignMeth[0], lmalignMeth[1], lmalignMeth[2])
|
|
224 else:
|
|
225 malignMeth = "".join(lmalignMeth)
|
|
226 consensusShorten = "%s-%s-%s%s-%s" % (self._projectName, palignMeth[0], clustMeth[0], clustID, malignMeth)
|
|
227
|
|
228 return consensusShorten
|
|
229
|
|
230 def renameHeaderInConsensusFastaFile(self, fileName = ""):
|
|
231 newFileName = fileName.split(".")[0]+"New.fa"
|
|
232
|
|
233 oldFile = open(fileName, "r")
|
|
234 newFile = open(newFileName, "w")
|
|
235
|
|
236 inputLine = oldFile.readline()
|
|
237 while inputLine != "" :
|
|
238 if ">" in inputLine:
|
|
239 self.setConsensusName(inputLine)
|
|
240 outputLine = ">%s" % self.shortenConsensusName()
|
|
241 newFile.write(outputLine)
|
|
242 else:
|
|
243 newFile.write(inputLine)
|
|
244
|
|
245 inputLine = oldFile.readline()
|
|
246
|
|
247 oldFile.close()
|
|
248 newFile.close()
|
|
249
|
|
250 os.system("mv %s.fa %sOld.fa" % (fileName.split(".")[0], fileName.split(".")[0]))
|
|
251 os.system("mv %sNew.fa %s.fa" % (fileName.split(".")[0], fileName.split(".")[0]))
|
|
252 os.system("rm -f %sOld.fa" % fileName.split(".")[0])
|
|
253
|
|
254 def writeOtherFeaturesLine(self, dEvidence):
|
|
255 other = "(NA)"
|
|
256 if dEvidence.has_key('other'):
|
|
257 lResults = []
|
|
258 dOtherResults = dEvidence['other']
|
|
259 lResultsWithCoding = self.formatCodingFeatures(dOtherResults, lResults)
|
|
260 lResultsFilled = self.formatStructFeatures(dOtherResults, lResultsWithCoding)
|
|
261 if len(lResultsFilled) != 0:
|
|
262 subOther = "; ".join(lResultsFilled)
|
|
263 other = '(%s)' % subOther
|
|
264 self._hasOtherPart = True
|
|
265 return other
|
|
266
|
|
267 def writeCodingFeaturesLine(self, dEvidence):
|
|
268 lResults = []
|
|
269 lResultsFilled = self.formatCodingFeatures(dEvidence, lResults)
|
|
270 if len(lResultsFilled) != 0:
|
|
271 subCoding = "; ".join(lResultsFilled)
|
|
272 coding = '(%s)' % subCoding
|
|
273 else:
|
|
274 coding = "(NA)"
|
|
275 return coding
|
|
276
|
|
277 def writeStructFeaturesLine(self, dEvidence):
|
|
278 lResults = []
|
|
279 lResultsFilled = self.formatStructFeatures(dEvidence, lResults)
|
|
280 if len(lResultsFilled) != 0:
|
|
281 subStruct = "; ".join(lResultsFilled)
|
|
282 struct = '(%s)' % subStruct
|
|
283 else:
|
|
284 struct = "(NA)"
|
|
285 return struct
|
|
286
|
|
287 def formatCodingFeatures(self, dEvidence, lResults):
|
|
288 if dEvidence.has_key('Repbase_tbx') and dEvidence['Repbase_tbx'] != []:
|
|
289 lResults.append("TE_BLRtx: %s" % ", ".join(map(str, dEvidence['Repbase_tbx'])))
|
|
290
|
|
291 if dEvidence.has_key('Repbase_bx') and dEvidence['Repbase_bx'] != []:
|
|
292 lResults.append("TE_BLRx: %s" % ", ".join(map(str, dEvidence['Repbase_bx'])))
|
|
293
|
|
294 if (dEvidence.has_key('te_hmmer')) and (dEvidence['te_hmmer'] != None):
|
|
295 lResults.append('profiles: %s' % self.formatProfilesResults(dEvidence['te_hmmer']))
|
|
296
|
|
297 if dEvidence.has_key('Other_profiles'):
|
|
298 lResults.append('Other_profiles: %s' % self.formatProfilesResults(dEvidence['Other_profiles']))
|
|
299
|
|
300 if dEvidence.has_key("rDNA") and (dEvidence["rDNA"] != None):
|
|
301 lResults.append("rDNA_BLRn: %s" % dEvidence["rDNA"])
|
|
302
|
|
303 if dEvidence.has_key("HG") and (dEvidence["HG"] != None):
|
|
304 lResults.append("HG_BLRn: %s" % dEvidence["HG"])
|
|
305
|
|
306 if len(lResults) != 0:
|
|
307 self._hasCodingPart = True
|
|
308 return lResults
|
|
309
|
|
310 def formatProfilesResults(self, dProfilesResults):
|
|
311 if len(dProfilesResults.keys()) == 0:
|
|
312 return ""
|
|
313 lResults = []
|
|
314 for key in dProfilesResults.keys():
|
|
315 iPDM = dProfilesResults[key]
|
|
316 cov = "%.2f%%" % iPDM.getCoverageOnSubject()
|
|
317 profilesResult = '%s: %s' % (key, cov)
|
|
318 lResults.append(profilesResult)
|
|
319 return ", ".join(lResults)
|
|
320
|
|
321 def formatStructFeatures(self, dEvidence, lResults):
|
|
322 if dEvidence.has_key('length') and (dEvidence['length']!= None):
|
|
323 lResults.append('TElength: %s' % dEvidence['length'])
|
|
324
|
|
325 if dEvidence.has_key('TR') and (dEvidence['TR'] != None):
|
|
326 lResults.append('TermRepeats: %s' % ", ".join(map(str, dEvidence['TR'])))
|
|
327
|
|
328 if dEvidence.has_key('ORF') and (dEvidence['ORF'] != None):
|
|
329 lResults.append('ORF: %s' % ", ".join(dEvidence['ORF']))
|
|
330
|
|
331 if dEvidence.has_key('SSR') and (dEvidence['SSR'] != None):
|
|
332 lResults.append('SSR: %s' % ", ".join(dEvidence['SSR']))
|
|
333
|
|
334 if dEvidence.has_key('SSRCoverage') and (dEvidence['SSRCoverage'] != None) :
|
|
335 lResults.append('SSRCoverage=%s' % dEvidence['SSRCoverage'])
|
|
336
|
|
337 if dEvidence.has_key('polyAtail'):
|
|
338 lResults.append('polyAtail')
|
|
339
|
|
340 if dEvidence.has_key('helitronExtremities') and (dEvidence['helitronExtremities'] != None):
|
|
341 lResults.append('helitronExtremities: %s' % ", ".join(map(str, dEvidence['helitronExtremities'])))
|
|
342 if len(lResults) != 0:
|
|
343 self._hasStructPart = True
|
|
344 return lResults
|
|
345
|
|
346 def _decisionRuleForWickerCode(self, teClass, order):
|
|
347 code = 'NA'
|
|
348 if order in DWICKERCODE.keys():
|
|
349 code = DWICKERCODE[order]
|
|
350 elif teClass in DWICKERCODE.keys():
|
|
351 code = DWICKERCODE[teClass]
|
|
352 elif order == "Unclassified" and teClass == "Unclassified":
|
|
353 code = "NA"
|
|
354 elif re.search("\|", order) and teClass == "Unclassified":
|
|
355 code = "XXX"
|
|
356 elif re.search("\|", order) and re.search("\|",teClass):
|
|
357 lClass = teClass.split("|")
|
|
358 for iC in lClass[1:]:
|
|
359 if lClass[0] != iC:
|
|
360 code = "XXX"
|
|
361 return code
|
|
362 code = DWICKERCODE[lClass[0]]
|
|
363 return code
|
|
364
|
|
365 def renameLARDTRIMAndMITE(self):
|
|
366 order = self.getConsensusOrder()
|
|
367 order = order.replace("MITE", "TIR-MITE")
|
|
368 order = order.replace("LARD", "LTR-LARD")
|
|
369 order = order.replace("TRIM", "LTR-TRIM")
|
|
370 self.setConsensusOrder(order)
|
|
371 dEvidence = self.getInfoEvidence()
|
|
372 if 'LARD' in dEvidence.keys():
|
|
373 dEvidence["LTR-LARD"] = dEvidence["LARD"]
|
|
374 del dEvidence["LARD"]
|
|
375 if 'TRIM' in dEvidence.keys():
|
|
376 dEvidence["LTR-TRIM"] = dEvidence["TRIM"]
|
|
377 del dEvidence["TRIM"]
|
|
378 if 'MITE' in dEvidence.keys():
|
|
379 dEvidence["TIR-MITE"] = dEvidence["MITE"]
|
|
380 del dEvidence["MITE"]
|
|
381 self.setInfoEvidence(dEvidence)
|
|
382
|
|
383
|
|
384
|
|
385 |