Mercurial > repos > urgi-team > teiso
comparison TEisotools-1.0/commons/core/utils/Classif.py @ 6:20ec0d14798e draft
Uploaded
author | urgi-team |
---|---|
date | Wed, 20 Jul 2016 05:00:24 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:4093a2fb58be | 6:20ec0d14798e |
---|---|
1 import re | |
2 import os | |
3 from collections import OrderedDict | |
4 | |
5 DWICKERCODE = { | |
6 "ClassI":"RXX", | |
7 "ClassII":"DXX", | |
8 "LTR":"RLX", | |
9 "DIRS":"RYX", | |
10 "PLE":"RPX", | |
11 "LINE":"RIX", | |
12 "SINE":"RSX", | |
13 "TIR":"DTX", | |
14 "Crypton":"DYX", | |
15 "Helitron":"DHX", | |
16 "Maverick":"DMX", | |
17 | |
18 "TIR-MITE":"DTX", | |
19 "LTR-LARD":"RLX", | |
20 "LTR-TRIM":"RLX" | |
21 } | |
22 | |
23 class Classif(object): | |
24 """ The class Classif is a object what determine a line in classif file. | |
25 """ | |
26 | |
27 def __init__(self, consensusName = "", code = "NA", outConfuseness = "", outCompleteness = "", projectName = "", isShorten = False, consensusLength = "NA", consensusStrand = "NA", consensusClass = "NA", consensusOrder = "NA", consensusSuperFam = "NA", consensusCI = "NA"): | |
28 self._consensusName = consensusName | |
29 self._confusness = outConfuseness | |
30 self._completeness = outCompleteness | |
31 self._projectName = projectName | |
32 self._isShorten = isShorten | |
33 self._consensusLength = consensusLength | |
34 self._consensusStrand = consensusStrand | |
35 self._consensusClass = consensusClass | |
36 self._consensusOrder = consensusOrder | |
37 self._consensusSuperFam = consensusSuperFam | |
38 self._consensusCI = consensusCI | |
39 self._consensusCoding = "" | |
40 self._consensusStruct = "" | |
41 self._consensusOther = "" | |
42 self._isNoChim = "" | |
43 self._hasCodingPart = False | |
44 self._hasStructPart = False | |
45 self._hasOtherPart = False | |
46 self._code = code | |
47 self._evidence = {} | |
48 | |
49 def __eq__(self, o): | |
50 if type(o) is type(self): | |
51 return self._consensusName == o._consensusName and self._code == o._code \ | |
52 and self._confusness == o._confusness and self._completeness == o._completeness | |
53 return False | |
54 | |
55 def __ne__(self, o): | |
56 return not self.__eq__(o) | |
57 | |
58 def getConsensusName(self): | |
59 return self._consensusName | |
60 | |
61 def getCode(self): | |
62 return self._code | |
63 | |
64 def getconfusness(self): | |
65 return self._confusness | |
66 | |
67 def getcompleteness(self): | |
68 return self._completeness | |
69 | |
70 def getprojectName(self): | |
71 return self._projectName | |
72 | |
73 def getConsensusLength(self): | |
74 return self._consensusLength | |
75 | |
76 def getConsensusStrand(self): | |
77 return self._consensusStrand | |
78 | |
79 def getConsensusClass(self): | |
80 return self._consensusClass | |
81 | |
82 def getConsensusOrder(self): | |
83 return self._consensusOrder | |
84 | |
85 def getConsensusSuperFamily(self): | |
86 return self._consensusSuperFam | |
87 | |
88 def getConsensusCI(self): | |
89 return str(self._consensusCI) | |
90 | |
91 def getInfoEvidence(self): | |
92 return self._evidence | |
93 | |
94 def getConsensusCoding(self): | |
95 if self._confusness == 'ok': | |
96 coding = self.writeCodingFeaturesLine(self._evidence) | |
97 else: | |
98 lOrder = self.getConsensusOrder().split("|") | |
99 coding = self.writeCodingFeaturesLine(self._evidence[lOrder[0]]) | |
100 for order in lOrder[1:]: | |
101 if self._evidence[order].keys() != ['other']: | |
102 coding = coding + "|" + self.writeCodingFeaturesLine(self._evidence[order]) | |
103 return "coding=" + coding | |
104 | |
105 def getConsensusStructure(self): | |
106 if self._confusness == 'ok': | |
107 Structure = self.writeStructFeaturesLine(self._evidence) | |
108 else: | |
109 lOrder = self.getConsensusOrder().split("|") | |
110 Structure = self.writeStructFeaturesLine(self._evidence[lOrder[0]]) | |
111 for order in lOrder[1:]: | |
112 if self._evidence[order].keys() != ['other']: | |
113 Structure = Structure + "|" + self.writeStructFeaturesLine(self._evidence[order]) | |
114 return "struct=" + Structure | |
115 | |
116 def getConsensusOther(self): | |
117 if self._confusness == 'ok': | |
118 Other = self.writeOtherFeaturesLine(self._evidence) | |
119 else: | |
120 lOrder = self.getConsensusOrder().split("|") | |
121 Other = self.writeOtherFeaturesLine(self._evidence[lOrder[0]]) | |
122 for order in lOrder[1:]: | |
123 Other = Other + "|" + self.writeOtherFeaturesLine(self._evidence[order]) | |
124 return "other=" + Other | |
125 | |
126 def setConsensusName(self, consensusName): | |
127 self._consensusName = consensusName | |
128 | |
129 def setInfoEvidence(self, evidence): | |
130 self._evidence = evidence | |
131 | |
132 def setCode(self): | |
133 self._code = self._decisionRuleForWickerCode(self.getConsensusClass(), self.getConsensusOrder()) | |
134 | |
135 def setConfusness(self, Confusness): | |
136 self._confusness = Confusness | |
137 | |
138 def setCompleteness(self, completeness): | |
139 self._completeness = completeness | |
140 | |
141 def setProjectName(self, projectName): | |
142 self._projectName = projectName | |
143 | |
144 def setConsensusLength(self, cLength): | |
145 self._consensusLength = cLength | |
146 | |
147 def setConsensusStrand(self, cStrand): | |
148 self._consensusStrand = cStrand | |
149 | |
150 def setConsensusClass(self, cClass): | |
151 self._consensusClass = cClass | |
152 | |
153 def setConsensusOrder(self, cOrder): | |
154 self._consensusOrder = cOrder | |
155 | |
156 def setConsensusSuperFamily(self, cSuperFamily): | |
157 self._consensusSuperFamily = cSuperFamily | |
158 | |
159 def setConsensusCI(self, CI): | |
160 self._consensusCI = CI | |
161 | |
162 def setConsensusCoding(self, coding): | |
163 self._consensusCoding = coding | |
164 | |
165 def setConsensusStructure(self, structure): | |
166 self._consensusStruct = structure | |
167 | |
168 def setConsensusOther(self, other): | |
169 self._consensusOther = other | |
170 | |
171 def setCodStrOthFromMessage(self, dico): | |
172 self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico) | |
173 self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico) | |
174 self._consensusOther = "other="+self.writeOtherFeaturesLine(dico) | |
175 | |
176 def setCodStrOthFromMessage2(self, dico, cOrder): | |
177 if 'rDNA' in cOrder: | |
178 cOrder = cOrder.replace('rDNA', 'RDNA') | |
179 lOrder = cOrder.split("|") | |
180 lDicoKeys = dico.keys() | |
181 if lOrder[0] not in lDicoKeys: | |
182 self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico) | |
183 self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico) | |
184 self._consensusOther = "other="+self.writeOtherFeaturesLine(dico) | |
185 else: | |
186 self._consensusCoding = "coding="+self.writeCodingFeaturesLine(dico[lDicoKeys[0]]) | |
187 self._consensusStruct = "struct="+self.writeStructFeaturesLine(dico[lDicoKeys[0]]) | |
188 self._consensusOther = "other="+self.writeOtherFeaturesLine(dico[lDicoKeys[0]]) | |
189 if len(lDicoKeys) != 1: | |
190 for order in lDicoKeys[1:]: | |
191 if dico[order].keys() == ['other']: | |
192 self._consensusOther = self._consensusOther+"|"+self.writeOtherFeaturesLine(dico[order]) | |
193 else: | |
194 self._consensusCoding = self._consensusCoding+"|"+self.writeCodingFeaturesLine(dico[order]) | |
195 self._consensusStruct = self._consensusStruct+"|"+self.writeStructFeaturesLine(dico[order]) | |
196 self._consensusOther = self._consensusOther+"|"+self.writeOtherFeaturesLine(dico[order]) | |
197 | |
198 def createNewConsensusName(self): | |
199 pastecClassif = "%s" % self._code | |
200 if self._completeness != "": | |
201 pastecClassif += "-%s" % self._completeness | |
202 if self._confusness != "": | |
203 pastecClassif += "-%s" % self._confusness | |
204 if self._isShorten: | |
205 pattern = "%s_[a-zA-Z0-9]+_[a-zA-Z0-9]+_[a-zA-Z0-9_]+" % self._projectName | |
206 if re.match(pattern, self._consensusName) and not "%s_RS_" % self._projectName in self._consensusName: | |
207 header = self.shortenConsensusName() | |
208 header = "%s_%s" % (pastecClassif, header) | |
209 else: | |
210 header = "%s_%s" % (pastecClassif, self._consensusName) | |
211 else: | |
212 header = "%s_%s" % (pastecClassif, self._consensusName) | |
213 | |
214 return header | |
215 | |
216 def shortenConsensusName(self): | |
217 desc = self._consensusName.split(self._projectName)[1] | |
218 palignMeth = desc.split("_")[1] | |
219 clustMeth = desc.split("_")[2] | |
220 clustID = desc.split("_")[3] | |
221 lmalignMeth = desc.split("_")[4:] | |
222 if len(lmalignMeth) > 2: | |
223 malignMeth = "%s%s_%s" % (lmalignMeth[0], lmalignMeth[1], lmalignMeth[2]) | |
224 else: | |
225 malignMeth = "".join(lmalignMeth) | |
226 consensusShorten = "%s-%s-%s%s-%s" % (self._projectName, palignMeth[0], clustMeth[0], clustID, malignMeth) | |
227 | |
228 return consensusShorten | |
229 | |
230 def renameHeaderInConsensusFastaFile(self, fileName = ""): | |
231 newFileName = fileName.split(".")[0]+"New.fa" | |
232 | |
233 oldFile = open(fileName, "r") | |
234 newFile = open(newFileName, "w") | |
235 | |
236 inputLine = oldFile.readline() | |
237 while inputLine != "" : | |
238 if ">" in inputLine: | |
239 self.setConsensusName(inputLine) | |
240 outputLine = ">%s" % self.shortenConsensusName() | |
241 newFile.write(outputLine) | |
242 else: | |
243 newFile.write(inputLine) | |
244 | |
245 inputLine = oldFile.readline() | |
246 | |
247 oldFile.close() | |
248 newFile.close() | |
249 | |
250 os.system("mv %s.fa %sOld.fa" % (fileName.split(".")[0], fileName.split(".")[0])) | |
251 os.system("mv %sNew.fa %s.fa" % (fileName.split(".")[0], fileName.split(".")[0])) | |
252 os.system("rm -f %sOld.fa" % fileName.split(".")[0]) | |
253 | |
254 def writeOtherFeaturesLine(self, dEvidence): | |
255 other = "(NA)" | |
256 if dEvidence.has_key('other'): | |
257 lResults = [] | |
258 dOtherResults = dEvidence['other'] | |
259 lResultsWithCoding = self.formatCodingFeatures(dOtherResults, lResults) | |
260 lResultsFilled = self.formatStructFeatures(dOtherResults, lResultsWithCoding) | |
261 if len(lResultsFilled) != 0: | |
262 subOther = "; ".join(lResultsFilled) | |
263 other = '(%s)' % subOther | |
264 self._hasOtherPart = True | |
265 return other | |
266 | |
267 def writeCodingFeaturesLine(self, dEvidence): | |
268 lResults = [] | |
269 lResultsFilled = self.formatCodingFeatures(dEvidence, lResults) | |
270 if len(lResultsFilled) != 0: | |
271 subCoding = "; ".join(lResultsFilled) | |
272 coding = '(%s)' % subCoding | |
273 else: | |
274 coding = "(NA)" | |
275 return coding | |
276 | |
277 def writeStructFeaturesLine(self, dEvidence): | |
278 lResults = [] | |
279 lResultsFilled = self.formatStructFeatures(dEvidence, lResults) | |
280 if len(lResultsFilled) != 0: | |
281 subStruct = "; ".join(lResultsFilled) | |
282 struct = '(%s)' % subStruct | |
283 else: | |
284 struct = "(NA)" | |
285 return struct | |
286 | |
287 def formatCodingFeatures(self, dEvidence, lResults): | |
288 if dEvidence.has_key('Repbase_tbx') and dEvidence['Repbase_tbx'] != []: | |
289 lResults.append("TE_BLRtx: %s" % ", ".join(map(str, dEvidence['Repbase_tbx']))) | |
290 | |
291 if dEvidence.has_key('Repbase_bx') and dEvidence['Repbase_bx'] != []: | |
292 lResults.append("TE_BLRx: %s" % ", ".join(map(str, dEvidence['Repbase_bx']))) | |
293 | |
294 if (dEvidence.has_key('te_hmmer')) and (dEvidence['te_hmmer'] != None): | |
295 lResults.append('profiles: %s' % self.formatProfilesResults(dEvidence['te_hmmer'])) | |
296 | |
297 if dEvidence.has_key('Other_profiles'): | |
298 lResults.append('Other_profiles: %s' % self.formatProfilesResults(dEvidence['Other_profiles'])) | |
299 | |
300 if dEvidence.has_key("rDNA") and (dEvidence["rDNA"] != None): | |
301 lResults.append("rDNA_BLRn: %s" % dEvidence["rDNA"]) | |
302 | |
303 if dEvidence.has_key("HG") and (dEvidence["HG"] != None): | |
304 lResults.append("HG_BLRn: %s" % dEvidence["HG"]) | |
305 | |
306 if len(lResults) != 0: | |
307 self._hasCodingPart = True | |
308 return lResults | |
309 | |
310 def formatProfilesResults(self, dProfilesResults): | |
311 if len(dProfilesResults.keys()) == 0: | |
312 return "" | |
313 lResults = [] | |
314 for key in dProfilesResults.keys(): | |
315 iPDM = dProfilesResults[key] | |
316 cov = "%.2f%%" % iPDM.getCoverageOnSubject() | |
317 profilesResult = '%s: %s' % (key, cov) | |
318 lResults.append(profilesResult) | |
319 return ", ".join(lResults) | |
320 | |
321 def formatStructFeatures(self, dEvidence, lResults): | |
322 if dEvidence.has_key('length') and (dEvidence['length']!= None): | |
323 lResults.append('TElength: %s' % dEvidence['length']) | |
324 | |
325 if dEvidence.has_key('TR') and (dEvidence['TR'] != None): | |
326 lResults.append('TermRepeats: %s' % ", ".join(map(str, dEvidence['TR']))) | |
327 | |
328 if dEvidence.has_key('ORF') and (dEvidence['ORF'] != None): | |
329 lResults.append('ORF: %s' % ", ".join(dEvidence['ORF'])) | |
330 | |
331 if dEvidence.has_key('SSR') and (dEvidence['SSR'] != None): | |
332 lResults.append('SSR: %s' % ", ".join(dEvidence['SSR'])) | |
333 | |
334 if dEvidence.has_key('SSRCoverage') and (dEvidence['SSRCoverage'] != None) : | |
335 lResults.append('SSRCoverage=%s' % dEvidence['SSRCoverage']) | |
336 | |
337 if dEvidence.has_key('polyAtail'): | |
338 lResults.append('polyAtail') | |
339 | |
340 if dEvidence.has_key('helitronExtremities') and (dEvidence['helitronExtremities'] != None): | |
341 lResults.append('helitronExtremities: %s' % ", ".join(map(str, dEvidence['helitronExtremities']))) | |
342 if len(lResults) != 0: | |
343 self._hasStructPart = True | |
344 return lResults | |
345 | |
346 def _decisionRuleForWickerCode(self, teClass, order): | |
347 code = 'NA' | |
348 if order in DWICKERCODE.keys(): | |
349 code = DWICKERCODE[order] | |
350 elif teClass in DWICKERCODE.keys(): | |
351 code = DWICKERCODE[teClass] | |
352 elif order == "Unclassified" and teClass == "Unclassified": | |
353 code = "NA" | |
354 elif re.search("\|", order) and teClass == "Unclassified": | |
355 code = "XXX" | |
356 elif re.search("\|", order) and re.search("\|",teClass): | |
357 lClass = teClass.split("|") | |
358 for iC in lClass[1:]: | |
359 if lClass[0] != iC: | |
360 code = "XXX" | |
361 return code | |
362 code = DWICKERCODE[lClass[0]] | |
363 return code | |
364 | |
365 def renameLARDTRIMAndMITE(self): | |
366 order = self.getConsensusOrder() | |
367 order = order.replace("MITE", "TIR-MITE") | |
368 order = order.replace("LARD", "LTR-LARD") | |
369 order = order.replace("TRIM", "LTR-TRIM") | |
370 self.setConsensusOrder(order) | |
371 dEvidence = self.getInfoEvidence() | |
372 if 'LARD' in dEvidence.keys(): | |
373 dEvidence["LTR-LARD"] = dEvidence["LARD"] | |
374 del dEvidence["LARD"] | |
375 if 'TRIM' in dEvidence.keys(): | |
376 dEvidence["LTR-TRIM"] = dEvidence["TRIM"] | |
377 del dEvidence["TRIM"] | |
378 if 'MITE' in dEvidence.keys(): | |
379 dEvidence["TIR-MITE"] = dEvidence["MITE"] | |
380 del dEvidence["MITE"] | |
381 self.setInfoEvidence(dEvidence) | |
382 | |
383 | |
384 | |
385 |