18
|
1 import os
|
|
2 import unittest
|
|
3 from commons.pyRepetUnit.align.AlignListUtils import AlignListUtils
|
|
4 from commons.pyRepetUnit.align.transformAACoordIntoNtCoord.TransformAACoordIntoNtCoordInAlignFormat import TransformAACoordIntoNtCoordInAlignFormat
|
|
5 from commons.core.utils.FileUtils import FileUtils
|
|
6 from commons.core.coord.Align import Align
|
|
7 from commons.core.coord.Range import Range
|
|
8 from commons.core.checker.RepetException import RepetException
|
|
9
|
|
10 class Test_TransformAACoordIntoNtCoordInAlignFormat(unittest.TestCase):
|
|
11
|
|
12 def setUp(self):
|
|
13 self.inputFileName = "alignFile.align"
|
|
14 self.consensusFile = "consensus.fa"
|
|
15 self.outputFileName = "outputFile.align"
|
|
16 self._expFileName = "expFile.align"
|
|
17
|
|
18 def tearDown(self):
|
|
19 if FileUtils.isRessourceExists(self.inputFileName):
|
|
20 os.remove(self.inputFileName)
|
|
21 if FileUtils.isRessourceExists(self.consensusFile):
|
|
22 os.remove(self.consensusFile)
|
|
23 if FileUtils.isRessourceExists(self.outputFileName):
|
|
24 os.remove(self.outputFileName)
|
|
25 if FileUtils.isRessourceExists(self._expFileName):
|
|
26 os.remove(self._expFileName)
|
|
27
|
|
28 def test_transformQueryCoord(self):
|
|
29 f = open(self.inputFileName, "w")
|
|
30 f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
|
|
31 f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
|
|
32 f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
|
|
33 f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
|
|
34 f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
|
|
35 f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
|
|
36 f.close()
|
|
37 f = open(self.consensusFile, "w")
|
|
38 f.write(">blumeria_Grouper_590_20:NoCat\n")
|
|
39 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
|
|
40 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
|
|
41 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
|
|
42 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
|
|
43 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
|
|
44 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
|
|
45 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
|
|
46 f.close()
|
|
47 alignRead = AlignListUtils()
|
|
48 tableauAlignInstance = alignRead.read(self.inputFileName)
|
|
49 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
|
|
50 alignTransformation.setConsensusFileName(self.consensusFile)
|
|
51 alignTransformation.transformQueryCoord(tableauAlignInstance)
|
|
52 #check query coord
|
|
53 self.assertEquals(tableauAlignInstance.get(0).range_query.start, 271)
|
|
54 self.assertEquals(tableauAlignInstance.get(0).range_query.end, 324)
|
|
55 self.assertEquals(tableauAlignInstance.get(1).range_query.start, 331)
|
|
56 self.assertEquals(tableauAlignInstance.get(1).range_query.end, 357)
|
|
57 self.assertEquals(tableauAlignInstance.get(2).range_query.start, 90)
|
|
58 self.assertEquals(tableauAlignInstance.get(2).range_query.end, 113)
|
|
59 self.assertEquals(tableauAlignInstance.get(3).range_query.start, 165)
|
|
60 self.assertEquals(tableauAlignInstance.get(3).range_query.end, 209)
|
|
61 self.assertEquals(tableauAlignInstance.get(4).range_query.start, 119)
|
|
62 self.assertEquals(tableauAlignInstance.get(4).range_query.end, 148)
|
|
63 self.assertEquals(tableauAlignInstance.get(5).range_query.start, 16)
|
|
64 self.assertEquals(tableauAlignInstance.get(5).range_query.end, 30)
|
|
65 #check subject (profiles) coord
|
|
66 #positive frame : they don't change
|
|
67 self.assertEquals(tableauAlignInstance.get(0).range_subject.start, 5)
|
|
68 self.assertEquals(tableauAlignInstance.get(0).range_subject.end, 22)
|
|
69 self.assertEquals(tableauAlignInstance.get(1).range_subject.start, 1)
|
|
70 self.assertEquals(tableauAlignInstance.get(1).range_subject.end, 9)
|
|
71 self.assertEquals(tableauAlignInstance.get(2).range_subject.start, 1)
|
|
72 self.assertEquals(tableauAlignInstance.get(2).range_subject.end, 9)
|
|
73 self.assertEquals(tableauAlignInstance.get(3).range_subject.start, 341)
|
|
74 self.assertEquals(tableauAlignInstance.get(3).range_subject.end, 355)
|
|
75 #negative frame : they must be inverted
|
|
76 self.assertEquals(tableauAlignInstance.get(4).range_subject.start, 182)
|
|
77 self.assertEquals(tableauAlignInstance.get(4).range_subject.end, 173)
|
|
78 self.assertEquals(tableauAlignInstance.get(5).range_subject.start, 280)
|
|
79 self.assertEquals(tableauAlignInstance.get(5).range_subject.end, 276)
|
|
80
|
|
81 def test_transformQueryCoord_with_seqName_not_in_consensus_file(self):
|
|
82 f = open(self.inputFileName, "w")
|
|
83 f.write("dummy_Grouper_590_20:NoCat_4\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
|
|
84 f.write("dummy_Grouper_590_20:NoCat_4\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
|
|
85 f.close()
|
|
86 f = open(self.consensusFile, "w")
|
|
87 f.write(">blumeria_Grouper_590_20:NoCat\n")
|
|
88 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
|
|
89 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
|
|
90 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
|
|
91 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
|
|
92 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
|
|
93 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
|
|
94 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
|
|
95 f.close()
|
|
96 alignRead = AlignListUtils()
|
|
97 tableauAlignInstance = alignRead.read(self.inputFileName)
|
|
98
|
|
99 isSystemExitRaised = False
|
|
100 try:
|
|
101 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
|
|
102 alignTransformation.setConsensusFileName(self.consensusFile)
|
|
103 alignTransformation.transformQueryCoord(tableauAlignInstance)
|
|
104 except SystemExit:
|
|
105 isSystemExitRaised = True
|
|
106 self.assertTrue(isSystemExitRaised)
|
|
107
|
|
108 def test_run(self):
|
|
109 f = open(self.inputFileName, "w")
|
|
110 f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
|
|
111 f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
|
|
112 f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
|
|
113 f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
|
|
114 f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
|
|
115 f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
|
|
116 f.close()
|
|
117 f = open(self.consensusFile, "w")
|
|
118 f.write(">blumeria_Grouper_590_20:NoCat\n")
|
|
119 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
|
|
120 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
|
|
121 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
|
|
122 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
|
|
123 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
|
|
124 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
|
|
125 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
|
|
126 f.close()
|
|
127 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
|
|
128 alignTransformation.setInFileName(self.inputFileName)
|
|
129 alignTransformation.setOutFileName(self.outputFileName)
|
|
130 alignTransformation.setConsensusFileName(self.consensusFile)
|
|
131 alignTransformation.run()
|
|
132 f = open(self._expFileName, "w")
|
|
133 f.write("blumeria_Grouper_590_20:NoCat\t271\t324\tDUF234\t5\t22\t1.5\t3\t0.000000\n")
|
|
134 f.write("blumeria_Grouper_590_20:NoCat\t331\t357\tDUF1414\t1\t9\t6.3\t2\t0.000000\n")
|
|
135 f.write("blumeria_Grouper_590_20:NoCat\t90\t113\tCPW_WPC\t1\t9\t7.7\t1\t0.000000\n")
|
|
136 f.write("blumeria_Grouper_590_20:NoCat\t119\t148\tDUF46\t182\t173\t0.11\t6\t0.000000\n")
|
|
137 f.close()
|
|
138 self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
|
|
139 self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName, self.outputFileName))
|
|
140
|
|
141 #TODO: is it normal ?
|
|
142 def test_run_header_without_frame(self):
|
|
143 f = open(self.inputFileName, "w")
|
|
144 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t186\t218\tPF02022.11_Integrase_Zn_INT_13.5\t1\t40\t8.9\t-20.4\t0\n")
|
|
145 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t209\t266\tPF04236.7_Transp_Tc5_C_Tase_25.0\t1\t61\t8.5\t-38.8\t0\n")
|
|
146 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t256\t266\tPF04236.7_Transp_Tc5_C_Tase_30.0\t51\t61\t2.3\t-0.2\t0\n")
|
|
147 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t388\t395\tPF10576.1_EndIII_4Fe-2S_EN_17.5\t10\t17\t3.9\t-0.0\t0\n")
|
|
148 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t425\t433\tPF05410.5_Peptidase_C31_CYP_25.0\t98\t106\t1.9\t-2.0\t0\n")
|
|
149 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t487\t497\tPF09225.2_Endonuc-PvuII_EN_25.0\t127\t137\t4.9\t-3.5\t0\n")
|
|
150 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t574\t581\tPF02093.8_Gag_p30_GAG_10.5\t1\t8\t7.3\t-4.5\t0\n")
|
|
151 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t693\t706\tPF00910.14_RNA_helicase_HEL_20.0\t1\t14\t3.8\t-1.0\t0\n")
|
|
152 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t735\t743\tPF09569.2_RE_ScaI_EN_25.0\t1\t9\t7.1\t-3.3\t0\n")
|
|
153 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t737\t743\tPF03577.7_Peptidase_C69_CYP_25.0\t459\t465\t6.2\t-4.8\t0\n")
|
|
154 f.close()
|
|
155 f = open(self.consensusFile, "w")
|
|
156 f.write(">BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\n")
|
|
157 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
|
|
158 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
|
|
159 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
|
|
160 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
|
|
161 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
|
|
162 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
|
|
163 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
|
|
164 f.close()
|
|
165 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
|
|
166 alignTransformation.setInFileName(self.inputFileName)
|
|
167 alignTransformation.setOutFileName(self.outputFileName)
|
|
168 alignTransformation.setConsensusFileName(self.consensusFile)
|
|
169 self.assertRaises(RepetException, alignTransformation.run)
|
|
170
|
|
171 def test_extractFrameFromSeqName(self):
|
|
172 alignInstance = Align()
|
|
173 rangeQuery = Range()
|
|
174 rangeQuery.seqname = "BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp"
|
|
175 alignInstance.range_query = rangeQuery
|
|
176 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
|
|
177 self.assertRaises(RepetException, alignTransformation.extractFrameFromSeqName, alignInstance)
|
|
178
|
|
179 def test_run_no_filter(self):
|
|
180 f = open(self.inputFileName, "w")
|
|
181 f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
|
|
182 f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
|
|
183 f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
|
|
184 f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
|
|
185 f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
|
|
186 f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
|
|
187 f.close()
|
|
188 f = open(self.consensusFile, "w")
|
|
189 f.write(">blumeria_Grouper_590_20:NoCat\n")
|
|
190 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
|
|
191 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
|
|
192 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
|
|
193 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
|
|
194 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
|
|
195 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
|
|
196 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
|
|
197 f.close()
|
|
198 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
|
|
199 alignTransformation.setInFileName(self.inputFileName)
|
|
200 alignTransformation.setOutFileName(self.outputFileName)
|
|
201 alignTransformation.setConsensusFileName(self.consensusFile)
|
|
202 alignTransformation.setIsFiltered(False)
|
|
203 alignTransformation.run()
|
|
204 self.assertTrue(FileUtils.getNbLinesInSingleFile(self.outputFileName), 6)
|
|
205 self.assertTrue(FileUtils.isRessourceExists(self.inputFileName))
|
|
206
|
|
207 def test_run_no_filter_clean_option(self):
|
|
208 f = open(self.inputFileName, "w")
|
|
209 f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
|
|
210 f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
|
|
211 f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
|
|
212 f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
|
|
213 f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
|
|
214 f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
|
|
215 f.close()
|
|
216 f = open(self.consensusFile, "w")
|
|
217 f.write(">blumeria_Grouper_590_20:NoCat\n")
|
|
218 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
|
|
219 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
|
|
220 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
|
|
221 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
|
|
222 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
|
|
223 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
|
|
224 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
|
|
225 f.close()
|
|
226 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
|
|
227 alignTransformation.setInFileName(self.inputFileName)
|
|
228 alignTransformation.setOutFileName(self.outputFileName)
|
|
229 alignTransformation.setConsensusFileName(self.consensusFile)
|
|
230 alignTransformation.setIsFiltered(True)
|
|
231 alignTransformation.setIsClean(True)
|
|
232 alignTransformation.run()
|
|
233 self.assertTrue(FileUtils.getNbLinesInSingleFile(self.outputFileName), 6)
|
|
234 self.assertFalse(FileUtils.isRessourceExists(self.inputFileName))
|
|
235
|
|
236 if __name__ == "__main__" :
|
|
237 unittest.main() |