comparison commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_TransformAACoordIntoNtCoordInAlignFormat.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 import os
2 import unittest
3 from commons.pyRepetUnit.align.AlignListUtils import AlignListUtils
4 from commons.pyRepetUnit.align.transformAACoordIntoNtCoord.TransformAACoordIntoNtCoordInAlignFormat import TransformAACoordIntoNtCoordInAlignFormat
5 from commons.core.utils.FileUtils import FileUtils
6 from commons.core.coord.Align import Align
7 from commons.core.coord.Range import Range
8 from commons.core.checker.RepetException import RepetException
9
10 class Test_TransformAACoordIntoNtCoordInAlignFormat(unittest.TestCase):
11
12 def setUp(self):
13 self.inputFileName = "alignFile.align"
14 self.consensusFile = "consensus.fa"
15 self.outputFileName = "outputFile.align"
16 self._expFileName = "expFile.align"
17
18 def tearDown(self):
19 if FileUtils.isRessourceExists(self.inputFileName):
20 os.remove(self.inputFileName)
21 if FileUtils.isRessourceExists(self.consensusFile):
22 os.remove(self.consensusFile)
23 if FileUtils.isRessourceExists(self.outputFileName):
24 os.remove(self.outputFileName)
25 if FileUtils.isRessourceExists(self._expFileName):
26 os.remove(self._expFileName)
27
28 def test_transformQueryCoord(self):
29 f = open(self.inputFileName, "w")
30 f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
31 f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
32 f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
33 f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
34 f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
35 f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
36 f.close()
37 f = open(self.consensusFile, "w")
38 f.write(">blumeria_Grouper_590_20:NoCat\n")
39 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
40 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
41 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
42 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
43 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
44 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
45 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
46 f.close()
47 alignRead = AlignListUtils()
48 tableauAlignInstance = alignRead.read(self.inputFileName)
49 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
50 alignTransformation.setConsensusFileName(self.consensusFile)
51 alignTransformation.transformQueryCoord(tableauAlignInstance)
52 #check query coord
53 self.assertEquals(tableauAlignInstance.get(0).range_query.start, 271)
54 self.assertEquals(tableauAlignInstance.get(0).range_query.end, 324)
55 self.assertEquals(tableauAlignInstance.get(1).range_query.start, 331)
56 self.assertEquals(tableauAlignInstance.get(1).range_query.end, 357)
57 self.assertEquals(tableauAlignInstance.get(2).range_query.start, 90)
58 self.assertEquals(tableauAlignInstance.get(2).range_query.end, 113)
59 self.assertEquals(tableauAlignInstance.get(3).range_query.start, 165)
60 self.assertEquals(tableauAlignInstance.get(3).range_query.end, 209)
61 self.assertEquals(tableauAlignInstance.get(4).range_query.start, 119)
62 self.assertEquals(tableauAlignInstance.get(4).range_query.end, 148)
63 self.assertEquals(tableauAlignInstance.get(5).range_query.start, 16)
64 self.assertEquals(tableauAlignInstance.get(5).range_query.end, 30)
65 #check subject (profiles) coord
66 #positive frame : they don't change
67 self.assertEquals(tableauAlignInstance.get(0).range_subject.start, 5)
68 self.assertEquals(tableauAlignInstance.get(0).range_subject.end, 22)
69 self.assertEquals(tableauAlignInstance.get(1).range_subject.start, 1)
70 self.assertEquals(tableauAlignInstance.get(1).range_subject.end, 9)
71 self.assertEquals(tableauAlignInstance.get(2).range_subject.start, 1)
72 self.assertEquals(tableauAlignInstance.get(2).range_subject.end, 9)
73 self.assertEquals(tableauAlignInstance.get(3).range_subject.start, 341)
74 self.assertEquals(tableauAlignInstance.get(3).range_subject.end, 355)
75 #negative frame : they must be inverted
76 self.assertEquals(tableauAlignInstance.get(4).range_subject.start, 182)
77 self.assertEquals(tableauAlignInstance.get(4).range_subject.end, 173)
78 self.assertEquals(tableauAlignInstance.get(5).range_subject.start, 280)
79 self.assertEquals(tableauAlignInstance.get(5).range_subject.end, 276)
80
81 def test_transformQueryCoord_with_seqName_not_in_consensus_file(self):
82 f = open(self.inputFileName, "w")
83 f.write("dummy_Grouper_590_20:NoCat_4\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
84 f.write("dummy_Grouper_590_20:NoCat_4\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
85 f.close()
86 f = open(self.consensusFile, "w")
87 f.write(">blumeria_Grouper_590_20:NoCat\n")
88 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
89 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
90 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
91 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
92 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
93 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
94 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
95 f.close()
96 alignRead = AlignListUtils()
97 tableauAlignInstance = alignRead.read(self.inputFileName)
98
99 isSystemExitRaised = False
100 try:
101 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
102 alignTransformation.setConsensusFileName(self.consensusFile)
103 alignTransformation.transformQueryCoord(tableauAlignInstance)
104 except SystemExit:
105 isSystemExitRaised = True
106 self.assertTrue(isSystemExitRaised)
107
108 def test_run(self):
109 f = open(self.inputFileName, "w")
110 f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
111 f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
112 f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
113 f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
114 f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
115 f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
116 f.close()
117 f = open(self.consensusFile, "w")
118 f.write(">blumeria_Grouper_590_20:NoCat\n")
119 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
120 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
121 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
122 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
123 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
124 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
125 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
126 f.close()
127 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
128 alignTransformation.setInFileName(self.inputFileName)
129 alignTransformation.setOutFileName(self.outputFileName)
130 alignTransformation.setConsensusFileName(self.consensusFile)
131 alignTransformation.run()
132 f = open(self._expFileName, "w")
133 f.write("blumeria_Grouper_590_20:NoCat\t271\t324\tDUF234\t5\t22\t1.5\t3\t0.000000\n")
134 f.write("blumeria_Grouper_590_20:NoCat\t331\t357\tDUF1414\t1\t9\t6.3\t2\t0.000000\n")
135 f.write("blumeria_Grouper_590_20:NoCat\t90\t113\tCPW_WPC\t1\t9\t7.7\t1\t0.000000\n")
136 f.write("blumeria_Grouper_590_20:NoCat\t119\t148\tDUF46\t182\t173\t0.11\t6\t0.000000\n")
137 f.close()
138 self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
139 self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName, self.outputFileName))
140
141 #TODO: is it normal ?
142 def test_run_header_without_frame(self):
143 f = open(self.inputFileName, "w")
144 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t186\t218\tPF02022.11_Integrase_Zn_INT_13.5\t1\t40\t8.9\t-20.4\t0\n")
145 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t209\t266\tPF04236.7_Transp_Tc5_C_Tase_25.0\t1\t61\t8.5\t-38.8\t0\n")
146 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t256\t266\tPF04236.7_Transp_Tc5_C_Tase_30.0\t51\t61\t2.3\t-0.2\t0\n")
147 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t388\t395\tPF10576.1_EndIII_4Fe-2S_EN_17.5\t10\t17\t3.9\t-0.0\t0\n")
148 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t425\t433\tPF05410.5_Peptidase_C31_CYP_25.0\t98\t106\t1.9\t-2.0\t0\n")
149 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t487\t497\tPF09225.2_Endonuc-PvuII_EN_25.0\t127\t137\t4.9\t-3.5\t0\n")
150 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t574\t581\tPF02093.8_Gag_p30_GAG_10.5\t1\t8\t7.3\t-4.5\t0\n")
151 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t693\t706\tPF00910.14_RNA_helicase_HEL_20.0\t1\t14\t3.8\t-1.0\t0\n")
152 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t735\t743\tPF09569.2_RE_ScaI_EN_25.0\t1\t9\t7.1\t-3.3\t0\n")
153 f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t737\t743\tPF03577.7_Peptidase_C69_CYP_25.0\t459\t465\t6.2\t-4.8\t0\n")
154 f.close()
155 f = open(self.consensusFile, "w")
156 f.write(">BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\n")
157 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
158 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
159 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
160 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
161 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
162 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
163 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
164 f.close()
165 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
166 alignTransformation.setInFileName(self.inputFileName)
167 alignTransformation.setOutFileName(self.outputFileName)
168 alignTransformation.setConsensusFileName(self.consensusFile)
169 self.assertRaises(RepetException, alignTransformation.run)
170
171 def test_extractFrameFromSeqName(self):
172 alignInstance = Align()
173 rangeQuery = Range()
174 rangeQuery.seqname = "BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp"
175 alignInstance.range_query = rangeQuery
176 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
177 self.assertRaises(RepetException, alignTransformation.extractFrameFromSeqName, alignInstance)
178
179 def test_run_no_filter(self):
180 f = open(self.inputFileName, "w")
181 f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
182 f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
183 f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
184 f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
185 f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
186 f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
187 f.close()
188 f = open(self.consensusFile, "w")
189 f.write(">blumeria_Grouper_590_20:NoCat\n")
190 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
191 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
192 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
193 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
194 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
195 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
196 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
197 f.close()
198 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
199 alignTransformation.setInFileName(self.inputFileName)
200 alignTransformation.setOutFileName(self.outputFileName)
201 alignTransformation.setConsensusFileName(self.consensusFile)
202 alignTransformation.setIsFiltered(False)
203 alignTransformation.run()
204 self.assertTrue(FileUtils.getNbLinesInSingleFile(self.outputFileName), 6)
205 self.assertTrue(FileUtils.isRessourceExists(self.inputFileName))
206
207 def test_run_no_filter_clean_option(self):
208 f = open(self.inputFileName, "w")
209 f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
210 f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
211 f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
212 f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
213 f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
214 f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
215 f.close()
216 f = open(self.consensusFile, "w")
217 f.write(">blumeria_Grouper_590_20:NoCat\n")
218 f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
219 f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
220 f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
221 f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
222 f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
223 f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
224 f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
225 f.close()
226 alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
227 alignTransformation.setInFileName(self.inputFileName)
228 alignTransformation.setOutFileName(self.outputFileName)
229 alignTransformation.setConsensusFileName(self.consensusFile)
230 alignTransformation.setIsFiltered(True)
231 alignTransformation.setIsClean(True)
232 alignTransformation.run()
233 self.assertTrue(FileUtils.getNbLinesInSingleFile(self.outputFileName), 6)
234 self.assertFalse(FileUtils.isRessourceExists(self.inputFileName))
235
236 if __name__ == "__main__" :
237 unittest.main()