Mercurial > repos > yufei-luo > s_mart
comparison commons/pyRepetUnit/align/transformAACoordIntoNtCoord/TransformAACoordIntoNtCoordInAlignFormat.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
comparison
equal
deleted
inserted
replaced
30:5677346472b5 | 31:0ab839023fe4 |
---|---|
1 import os | |
2 import sys | |
3 from commons.pyRepetUnit.align.AlignListUtils import AlignListUtils | |
4 from commons.core.seq.BioseqUtils import BioseqUtils | |
5 from commons.core.checker.RepetException import RepetException | |
6 | |
7 ### Transform amino acid query coord in an align format to nucleotide coord | |
8 ### according to the frame specified at the end of seqName | |
9 # | |
10 class TransformAACoordIntoNtCoordInAlignFormat( object ): | |
11 | |
12 def __init__(self): | |
13 self._inFileName = None | |
14 self._clean = False | |
15 self._outFileName = None | |
16 self._consensusFileName = None | |
17 self._IsFiltered = True | |
18 | |
19 ## read input file, transform it and write the output file | |
20 # | |
21 def run(self): | |
22 alignUtils = AlignListUtils() | |
23 listAlignInstance = alignUtils.read(self._inFileName) | |
24 self.transformQueryCoord(listAlignInstance) | |
25 #self.getOriginalQueryNameForAlignList(listAlignInstance) | |
26 if self._IsFiltered: | |
27 alignUtils.filterOnAMinimalScore(listAlignInstance, 0) | |
28 alignUtils.write(listAlignInstance, self._outFileName) | |
29 if self._clean: | |
30 self.clean() | |
31 | |
32 ## Transform the amino acid query coord into nucleotides and switch subject coord if the strand is reversed | |
33 # @param listAlignInstance list of align object instance | |
34 # | |
35 def transformQueryCoord(self, listAlignInstance): | |
36 bioseqList = BioseqUtils.extractBioseqListFromFastaFile( self._consensusFileName ) | |
37 for alignInstance in listAlignInstance.getList(): | |
38 try: | |
39 frame = self.extractFrameFromSeqName(alignInstance) | |
40 except RepetException, e: | |
41 raise e | |
42 previousEnd = alignInstance.range_query.end | |
43 previousStart = alignInstance.range_query.start | |
44 alignInstance.range_query.seqname = self._getOriginalQueryNameForAlignInstance(alignInstance) | |
45 if frame < 4: | |
46 self._changeStartInAAIntoNtInPositiveFrame(alignInstance, frame, previousStart) | |
47 self._changeEndInAAIntoNtInPositiveFrame(alignInstance, frame, previousEnd) | |
48 else: | |
49 self._checkIfSeqNameIsInDNASeqFile(bioseqList, alignInstance.range_query.seqname) | |
50 consensusLength = BioseqUtils.getSeqLengthWithSeqName(bioseqList, alignInstance.range_query.seqname) | |
51 self._changeStartInAAIntoNtInNegativeFrame(alignInstance, frame, consensusLength, previousEnd) | |
52 self._changeEndInAAIntoNtInNegativeFrame(alignInstance, frame, consensusLength, previousStart) | |
53 self._invertedSubjectCoord(alignInstance) | |
54 | |
55 ## remove the input file | |
56 # | |
57 def clean(self): | |
58 os.remove(self._inFileName) | |
59 | |
60 ## set input file name | |
61 # | |
62 # @param fileName string name of file | |
63 # | |
64 def setInFileName(self, fileName): | |
65 self._inFileName = fileName | |
66 | |
67 ## set output file name | |
68 # | |
69 # @param fileName string name of file | |
70 # | |
71 def setOutFileName(self, fileName): | |
72 self._outFileName = fileName | |
73 | |
74 ## set consensus file name | |
75 # | |
76 # @param fileName string name of file | |
77 # | |
78 def setConsensusFileName(self, fileName): | |
79 self._consensusFileName = fileName | |
80 | |
81 ## set is clean will be done | |
82 # | |
83 # @param clean boolean clean | |
84 # | |
85 def setIsClean(self, clean): | |
86 self._clean = clean | |
87 | |
88 ## get input file name | |
89 # | |
90 def getInFileName(self): | |
91 return self._inFileName | |
92 | |
93 ## set is negativ score filter will be done | |
94 # | |
95 # @param isFiltered boolean isFiltered | |
96 # | |
97 def setIsFiltered(self, isFiltered): | |
98 self._IsFiltered = isFiltered | |
99 | |
100 def _getOriginalQueryNameForAlignInstance(self, alignInstance): | |
101 return alignInstance.range_query.seqname[0:len(alignInstance.range_query.seqname) - 2] | |
102 | |
103 def _invertedSubjectCoord(self, alignInstance): | |
104 return alignInstance.range_subject.reverse() | |
105 | |
106 def _changeEndInAAIntoNtInPositiveFrame(self, alignInstance, frame, previousEnd): | |
107 alignInstance.range_query.end = 3 * previousEnd + frame - 1 | |
108 | |
109 def _changeStartInAAIntoNtInPositiveFrame(self, alignInstance, frame, previousStart): | |
110 alignInstance.range_query.start = 3 * (previousStart - 1) + frame | |
111 | |
112 def _changeEndInAAIntoNtInNegativeFrame(self, alignInstance, frame, consensusLength, previousStart): | |
113 alignInstance.range_query.end = consensusLength - 3 * (previousStart - 1) - frame + 4 | |
114 | |
115 def _changeStartInAAIntoNtInNegativeFrame(self, alignInstance, frame, consensusLength, previousEnd): | |
116 alignInstance.range_query.start = consensusLength - 3 * (previousEnd - 1) - frame + 2 | |
117 | |
118 def extractFrameFromSeqName(self, alignInstance): | |
119 try: | |
120 frame = int(alignInstance.range_query.seqname[len(alignInstance.range_query.seqname) - 1]) | |
121 except ValueError: | |
122 raise RepetException("Unable to extract frame from sequence name") | |
123 return frame | |
124 | |
125 def _checkIfSeqNameIsInDNASeqFile(self, bioseqList, seqName): | |
126 isSeqNameInBioseqList = False | |
127 for bioseq in bioseqList: | |
128 if seqName == bioseq.header: | |
129 isSeqNameInBioseqList = True | |
130 if not isSeqNameInBioseqList: | |
131 sys.stderr.write("seqName : " + seqName + " is not in the consensus file " + self._consensusFileName + "\n") | |
132 sys.exit(1) | |
133 |