Mercurial > repos > yufei-luo > s_mart
diff commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_TransformAACoordIntoNtCoordInAlignFormat.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_TransformAACoordIntoNtCoordInAlignFormat.py Mon Apr 29 03:20:15 2013 -0400 @@ -0,0 +1,237 @@ +import os +import unittest +from commons.pyRepetUnit.align.AlignListUtils import AlignListUtils +from commons.pyRepetUnit.align.transformAACoordIntoNtCoord.TransformAACoordIntoNtCoordInAlignFormat import TransformAACoordIntoNtCoordInAlignFormat +from commons.core.utils.FileUtils import FileUtils +from commons.core.coord.Align import Align +from commons.core.coord.Range import Range +from commons.core.checker.RepetException import RepetException + +class Test_TransformAACoordIntoNtCoordInAlignFormat(unittest.TestCase): + + def setUp(self): + self.inputFileName = "alignFile.align" + self.consensusFile = "consensus.fa" + self.outputFileName = "outputFile.align" + self._expFileName = "expFile.align" + + def tearDown(self): + if FileUtils.isRessourceExists(self.inputFileName): + os.remove(self.inputFileName) + if FileUtils.isRessourceExists(self.consensusFile): + os.remove(self.consensusFile) + if FileUtils.isRessourceExists(self.outputFileName): + os.remove(self.outputFileName) + if FileUtils.isRessourceExists(self._expFileName): + os.remove(self._expFileName) + + def test_transformQueryCoord(self): + f = open(self.inputFileName, "w") + f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n") + f.close() + f = open(self.consensusFile, "w") + f.write(">blumeria_Grouper_590_20:NoCat\n") + f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n") + f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n") + f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n") + f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n") + f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n") + f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n") + f.write("TCCATAATTTCAACACTNAAGAATATTTGTA") + f.close() + alignRead = AlignListUtils() + tableauAlignInstance = alignRead.read(self.inputFileName) + alignTransformation = TransformAACoordIntoNtCoordInAlignFormat() + alignTransformation.setConsensusFileName(self.consensusFile) + alignTransformation.transformQueryCoord(tableauAlignInstance) + #check query coord + self.assertEquals(tableauAlignInstance.get(0).range_query.start, 271) + self.assertEquals(tableauAlignInstance.get(0).range_query.end, 324) + self.assertEquals(tableauAlignInstance.get(1).range_query.start, 331) + self.assertEquals(tableauAlignInstance.get(1).range_query.end, 357) + self.assertEquals(tableauAlignInstance.get(2).range_query.start, 90) + self.assertEquals(tableauAlignInstance.get(2).range_query.end, 113) + self.assertEquals(tableauAlignInstance.get(3).range_query.start, 165) + self.assertEquals(tableauAlignInstance.get(3).range_query.end, 209) + self.assertEquals(tableauAlignInstance.get(4).range_query.start, 119) + self.assertEquals(tableauAlignInstance.get(4).range_query.end, 148) + self.assertEquals(tableauAlignInstance.get(5).range_query.start, 16) + self.assertEquals(tableauAlignInstance.get(5).range_query.end, 30) + #check subject (profiles) coord + #positive frame : they don't change + self.assertEquals(tableauAlignInstance.get(0).range_subject.start, 5) + self.assertEquals(tableauAlignInstance.get(0).range_subject.end, 22) + self.assertEquals(tableauAlignInstance.get(1).range_subject.start, 1) + self.assertEquals(tableauAlignInstance.get(1).range_subject.end, 9) + self.assertEquals(tableauAlignInstance.get(2).range_subject.start, 1) + self.assertEquals(tableauAlignInstance.get(2).range_subject.end, 9) + self.assertEquals(tableauAlignInstance.get(3).range_subject.start, 341) + self.assertEquals(tableauAlignInstance.get(3).range_subject.end, 355) + #negative frame : they must be inverted + self.assertEquals(tableauAlignInstance.get(4).range_subject.start, 182) + self.assertEquals(tableauAlignInstance.get(4).range_subject.end, 173) + self.assertEquals(tableauAlignInstance.get(5).range_subject.start, 280) + self.assertEquals(tableauAlignInstance.get(5).range_subject.end, 276) + + def test_transformQueryCoord_with_seqName_not_in_consensus_file(self): + f = open(self.inputFileName, "w") + f.write("dummy_Grouper_590_20:NoCat_4\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n") + f.write("dummy_Grouper_590_20:NoCat_4\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n") + f.close() + f = open(self.consensusFile, "w") + f.write(">blumeria_Grouper_590_20:NoCat\n") + f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n") + f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n") + f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n") + f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n") + f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n") + f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n") + f.write("TCCATAATTTCAACACTNAAGAATATTTGTA") + f.close() + alignRead = AlignListUtils() + tableauAlignInstance = alignRead.read(self.inputFileName) + + isSystemExitRaised = False + try: + alignTransformation = TransformAACoordIntoNtCoordInAlignFormat() + alignTransformation.setConsensusFileName(self.consensusFile) + alignTransformation.transformQueryCoord(tableauAlignInstance) + except SystemExit: + isSystemExitRaised = True + self.assertTrue(isSystemExitRaised) + + def test_run(self): + f = open(self.inputFileName, "w") + f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n") + f.close() + f = open(self.consensusFile, "w") + f.write(">blumeria_Grouper_590_20:NoCat\n") + f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n") + f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n") + f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n") + f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n") + f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n") + f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n") + f.write("TCCATAATTTCAACACTNAAGAATATTTGTA") + f.close() + alignTransformation = TransformAACoordIntoNtCoordInAlignFormat() + alignTransformation.setInFileName(self.inputFileName) + alignTransformation.setOutFileName(self.outputFileName) + alignTransformation.setConsensusFileName(self.consensusFile) + alignTransformation.run() + f = open(self._expFileName, "w") + f.write("blumeria_Grouper_590_20:NoCat\t271\t324\tDUF234\t5\t22\t1.5\t3\t0.000000\n") + f.write("blumeria_Grouper_590_20:NoCat\t331\t357\tDUF1414\t1\t9\t6.3\t2\t0.000000\n") + f.write("blumeria_Grouper_590_20:NoCat\t90\t113\tCPW_WPC\t1\t9\t7.7\t1\t0.000000\n") + f.write("blumeria_Grouper_590_20:NoCat\t119\t148\tDUF46\t182\t173\t0.11\t6\t0.000000\n") + f.close() + self.assertTrue(FileUtils.isRessourceExists(self.outputFileName)) + self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName, self.outputFileName)) + + #TODO: is it normal ? + def test_run_header_without_frame(self): + f = open(self.inputFileName, "w") + f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t186\t218\tPF02022.11_Integrase_Zn_INT_13.5\t1\t40\t8.9\t-20.4\t0\n") + f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t209\t266\tPF04236.7_Transp_Tc5_C_Tase_25.0\t1\t61\t8.5\t-38.8\t0\n") + f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t256\t266\tPF04236.7_Transp_Tc5_C_Tase_30.0\t51\t61\t2.3\t-0.2\t0\n") + f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t388\t395\tPF10576.1_EndIII_4Fe-2S_EN_17.5\t10\t17\t3.9\t-0.0\t0\n") + f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t425\t433\tPF05410.5_Peptidase_C31_CYP_25.0\t98\t106\t1.9\t-2.0\t0\n") + f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t487\t497\tPF09225.2_Endonuc-PvuII_EN_25.0\t127\t137\t4.9\t-3.5\t0\n") + f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t574\t581\tPF02093.8_Gag_p30_GAG_10.5\t1\t8\t7.3\t-4.5\t0\n") + f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t693\t706\tPF00910.14_RNA_helicase_HEL_20.0\t1\t14\t3.8\t-1.0\t0\n") + f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t735\t743\tPF09569.2_RE_ScaI_EN_25.0\t1\t9\t7.1\t-3.3\t0\n") + f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t737\t743\tPF03577.7_Peptidase_C69_CYP_25.0\t459\t465\t6.2\t-4.8\t0\n") + f.close() + f = open(self.consensusFile, "w") + f.write(">BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\n") + f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n") + f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n") + f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n") + f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n") + f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n") + f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n") + f.write("TCCATAATTTCAACACTNAAGAATATTTGTA") + f.close() + alignTransformation = TransformAACoordIntoNtCoordInAlignFormat() + alignTransformation.setInFileName(self.inputFileName) + alignTransformation.setOutFileName(self.outputFileName) + alignTransformation.setConsensusFileName(self.consensusFile) + self.assertRaises(RepetException, alignTransformation.run) + + def test_extractFrameFromSeqName(self): + alignInstance = Align() + rangeQuery = Range() + rangeQuery.seqname = "BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp" + alignInstance.range_query = rangeQuery + alignTransformation = TransformAACoordIntoNtCoordInAlignFormat() + self.assertRaises(RepetException, alignTransformation.extractFrameFromSeqName, alignInstance) + + def test_run_no_filter(self): + f = open(self.inputFileName, "w") + f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n") + f.close() + f = open(self.consensusFile, "w") + f.write(">blumeria_Grouper_590_20:NoCat\n") + f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n") + f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n") + f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n") + f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n") + f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n") + f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n") + f.write("TCCATAATTTCAACACTNAAGAATATTTGTA") + f.close() + alignTransformation = TransformAACoordIntoNtCoordInAlignFormat() + alignTransformation.setInFileName(self.inputFileName) + alignTransformation.setOutFileName(self.outputFileName) + alignTransformation.setConsensusFileName(self.consensusFile) + alignTransformation.setIsFiltered(False) + alignTransformation.run() + self.assertTrue(FileUtils.getNbLinesInSingleFile(self.outputFileName), 6) + self.assertTrue(FileUtils.isRessourceExists(self.inputFileName)) + + def test_run_no_filter_clean_option(self): + f = open(self.inputFileName, "w") + f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n") + f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n") + f.close() + f = open(self.consensusFile, "w") + f.write(">blumeria_Grouper_590_20:NoCat\n") + f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n") + f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n") + f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n") + f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n") + f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n") + f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n") + f.write("TCCATAATTTCAACACTNAAGAATATTTGTA") + f.close() + alignTransformation = TransformAACoordIntoNtCoordInAlignFormat() + alignTransformation.setInFileName(self.inputFileName) + alignTransformation.setOutFileName(self.outputFileName) + alignTransformation.setConsensusFileName(self.consensusFile) + alignTransformation.setIsFiltered(True) + alignTransformation.setIsClean(True) + alignTransformation.run() + self.assertTrue(FileUtils.getNbLinesInSingleFile(self.outputFileName), 6) + self.assertFalse(FileUtils.isRessourceExists(self.inputFileName)) + +if __name__ == "__main__" : + unittest.main() \ No newline at end of file