comparison commons/pyRepetUnit/align/transformAACoordIntoNtCoord/TransformAACoordIntoNtCoordInAlignFormat.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 import os
2 import sys
3 from commons.pyRepetUnit.align.AlignListUtils import AlignListUtils
4 from commons.core.seq.BioseqUtils import BioseqUtils
5 from commons.core.checker.RepetException import RepetException
6
7 ### Transform amino acid query coord in an align format to nucleotide coord
8 ### according to the frame specified at the end of seqName
9 #
10 class TransformAACoordIntoNtCoordInAlignFormat( object ):
11
12 def __init__(self):
13 self._inFileName = None
14 self._clean = False
15 self._outFileName = None
16 self._consensusFileName = None
17 self._IsFiltered = True
18
19 ## read input file, transform it and write the output file
20 #
21 def run(self):
22 alignUtils = AlignListUtils()
23 listAlignInstance = alignUtils.read(self._inFileName)
24 self.transformQueryCoord(listAlignInstance)
25 #self.getOriginalQueryNameForAlignList(listAlignInstance)
26 if self._IsFiltered:
27 alignUtils.filterOnAMinimalScore(listAlignInstance, 0)
28 alignUtils.write(listAlignInstance, self._outFileName)
29 if self._clean:
30 self.clean()
31
32 ## Transform the amino acid query coord into nucleotides and switch subject coord if the strand is reversed
33 # @param listAlignInstance list of align object instance
34 #
35 def transformQueryCoord(self, listAlignInstance):
36 bioseqList = BioseqUtils.extractBioseqListFromFastaFile( self._consensusFileName )
37 for alignInstance in listAlignInstance.getList():
38 try:
39 frame = self.extractFrameFromSeqName(alignInstance)
40 except RepetException, e:
41 raise e
42 previousEnd = alignInstance.range_query.end
43 previousStart = alignInstance.range_query.start
44 alignInstance.range_query.seqname = self._getOriginalQueryNameForAlignInstance(alignInstance)
45 if frame < 4:
46 self._changeStartInAAIntoNtInPositiveFrame(alignInstance, frame, previousStart)
47 self._changeEndInAAIntoNtInPositiveFrame(alignInstance, frame, previousEnd)
48 else:
49 self._checkIfSeqNameIsInDNASeqFile(bioseqList, alignInstance.range_query.seqname)
50 consensusLength = BioseqUtils.getSeqLengthWithSeqName(bioseqList, alignInstance.range_query.seqname)
51 self._changeStartInAAIntoNtInNegativeFrame(alignInstance, frame, consensusLength, previousEnd)
52 self._changeEndInAAIntoNtInNegativeFrame(alignInstance, frame, consensusLength, previousStart)
53 self._invertedSubjectCoord(alignInstance)
54
55 ## remove the input file
56 #
57 def clean(self):
58 os.remove(self._inFileName)
59
60 ## set input file name
61 #
62 # @param fileName string name of file
63 #
64 def setInFileName(self, fileName):
65 self._inFileName = fileName
66
67 ## set output file name
68 #
69 # @param fileName string name of file
70 #
71 def setOutFileName(self, fileName):
72 self._outFileName = fileName
73
74 ## set consensus file name
75 #
76 # @param fileName string name of file
77 #
78 def setConsensusFileName(self, fileName):
79 self._consensusFileName = fileName
80
81 ## set is clean will be done
82 #
83 # @param clean boolean clean
84 #
85 def setIsClean(self, clean):
86 self._clean = clean
87
88 ## get input file name
89 #
90 def getInFileName(self):
91 return self._inFileName
92
93 ## set is negativ score filter will be done
94 #
95 # @param isFiltered boolean isFiltered
96 #
97 def setIsFiltered(self, isFiltered):
98 self._IsFiltered = isFiltered
99
100 def _getOriginalQueryNameForAlignInstance(self, alignInstance):
101 return alignInstance.range_query.seqname[0:len(alignInstance.range_query.seqname) - 2]
102
103 def _invertedSubjectCoord(self, alignInstance):
104 return alignInstance.range_subject.reverse()
105
106 def _changeEndInAAIntoNtInPositiveFrame(self, alignInstance, frame, previousEnd):
107 alignInstance.range_query.end = 3 * previousEnd + frame - 1
108
109 def _changeStartInAAIntoNtInPositiveFrame(self, alignInstance, frame, previousStart):
110 alignInstance.range_query.start = 3 * (previousStart - 1) + frame
111
112 def _changeEndInAAIntoNtInNegativeFrame(self, alignInstance, frame, consensusLength, previousStart):
113 alignInstance.range_query.end = consensusLength - 3 * (previousStart - 1) - frame + 4
114
115 def _changeStartInAAIntoNtInNegativeFrame(self, alignInstance, frame, consensusLength, previousEnd):
116 alignInstance.range_query.start = consensusLength - 3 * (previousEnd - 1) - frame + 2
117
118 def extractFrameFromSeqName(self, alignInstance):
119 try:
120 frame = int(alignInstance.range_query.seqname[len(alignInstance.range_query.seqname) - 1])
121 except ValueError:
122 raise RepetException("Unable to extract frame from sequence name")
123 return frame
124
125 def _checkIfSeqNameIsInDNASeqFile(self, bioseqList, seqName):
126 isSeqNameInBioseqList = False
127 for bioseq in bioseqList:
128 if seqName == bioseq.header:
129 isSeqNameInBioseqList = True
130 if not isSeqNameInBioseqList:
131 sys.stderr.write("seqName : " + seqName + " is not in the consensus file " + self._consensusFileName + "\n")
132 sys.exit(1)
133