diff commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_TransformAACoordIntoNtCoordInAlignFormat.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_TransformAACoordIntoNtCoordInAlignFormat.py	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,237 @@
+import os
+import unittest
+from commons.pyRepetUnit.align.AlignListUtils import AlignListUtils
+from commons.pyRepetUnit.align.transformAACoordIntoNtCoord.TransformAACoordIntoNtCoordInAlignFormat import TransformAACoordIntoNtCoordInAlignFormat
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.coord.Align import Align
+from commons.core.coord.Range import Range
+from commons.core.checker.RepetException import RepetException
+
+class Test_TransformAACoordIntoNtCoordInAlignFormat(unittest.TestCase):
+    
+    def setUp(self):
+        self.inputFileName = "alignFile.align"
+        self.consensusFile = "consensus.fa"
+        self.outputFileName = "outputFile.align"
+        self._expFileName = "expFile.align"
+        
+    def tearDown(self):
+        if FileUtils.isRessourceExists(self.inputFileName):
+            os.remove(self.inputFileName)
+        if FileUtils.isRessourceExists(self.consensusFile):
+            os.remove(self.consensusFile)
+        if FileUtils.isRessourceExists(self.outputFileName):
+            os.remove(self.outputFileName)
+        if FileUtils.isRessourceExists(self._expFileName):
+            os.remove(self._expFileName)
+    
+    def test_transformQueryCoord(self):
+        f = open(self.inputFileName, "w")
+        f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
+        f.close()
+        f = open(self.consensusFile, "w")
+        f.write(">blumeria_Grouper_590_20:NoCat\n")
+        f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
+        f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
+        f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
+        f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
+        f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
+        f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
+        f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
+        f.close() 
+        alignRead = AlignListUtils()      
+        tableauAlignInstance = alignRead.read(self.inputFileName)
+        alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
+        alignTransformation.setConsensusFileName(self.consensusFile)
+        alignTransformation.transformQueryCoord(tableauAlignInstance)
+        #check query coord
+        self.assertEquals(tableauAlignInstance.get(0).range_query.start, 271)
+        self.assertEquals(tableauAlignInstance.get(0).range_query.end, 324) 
+        self.assertEquals(tableauAlignInstance.get(1).range_query.start, 331)
+        self.assertEquals(tableauAlignInstance.get(1).range_query.end, 357) 
+        self.assertEquals(tableauAlignInstance.get(2).range_query.start, 90)
+        self.assertEquals(tableauAlignInstance.get(2).range_query.end, 113) 
+        self.assertEquals(tableauAlignInstance.get(3).range_query.start, 165)
+        self.assertEquals(tableauAlignInstance.get(3).range_query.end, 209) 
+        self.assertEquals(tableauAlignInstance.get(4).range_query.start, 119)
+        self.assertEquals(tableauAlignInstance.get(4).range_query.end, 148) 
+        self.assertEquals(tableauAlignInstance.get(5).range_query.start, 16)
+        self.assertEquals(tableauAlignInstance.get(5).range_query.end, 30) 
+        #check subject (profiles) coord
+        #positive frame : they don't change
+        self.assertEquals(tableauAlignInstance.get(0).range_subject.start, 5)
+        self.assertEquals(tableauAlignInstance.get(0).range_subject.end, 22) 
+        self.assertEquals(tableauAlignInstance.get(1).range_subject.start, 1)
+        self.assertEquals(tableauAlignInstance.get(1).range_subject.end, 9) 
+        self.assertEquals(tableauAlignInstance.get(2).range_subject.start, 1)
+        self.assertEquals(tableauAlignInstance.get(2).range_subject.end, 9) 
+        self.assertEquals(tableauAlignInstance.get(3).range_subject.start, 341)
+        self.assertEquals(tableauAlignInstance.get(3).range_subject.end, 355) 
+        #negative frame : they must be inverted
+        self.assertEquals(tableauAlignInstance.get(4).range_subject.start, 182)
+        self.assertEquals(tableauAlignInstance.get(4).range_subject.end, 173) 
+        self.assertEquals(tableauAlignInstance.get(5).range_subject.start, 280)
+        self.assertEquals(tableauAlignInstance.get(5).range_subject.end, 276) 
+    
+    def test_transformQueryCoord_with_seqName_not_in_consensus_file(self):
+        f = open(self.inputFileName, "w")
+        f.write("dummy_Grouper_590_20:NoCat_4\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("dummy_Grouper_590_20:NoCat_4\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.close()
+        f = open(self.consensusFile, "w")
+        f.write(">blumeria_Grouper_590_20:NoCat\n")
+        f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
+        f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
+        f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
+        f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
+        f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
+        f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
+        f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
+        f.close() 
+        alignRead = AlignListUtils()      
+        tableauAlignInstance = alignRead.read(self.inputFileName)
+        
+        isSystemExitRaised = False
+        try:
+            alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
+            alignTransformation.setConsensusFileName(self.consensusFile)
+            alignTransformation.transformQueryCoord(tableauAlignInstance)
+        except SystemExit:
+            isSystemExitRaised = True
+        self.assertTrue(isSystemExitRaised)
+    
+    def test_run(self):
+        f = open(self.inputFileName, "w")
+        f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
+        f.close()
+        f = open(self.consensusFile, "w")
+        f.write(">blumeria_Grouper_590_20:NoCat\n")
+        f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
+        f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
+        f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
+        f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
+        f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
+        f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
+        f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
+        f.close() 
+        alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
+        alignTransformation.setInFileName(self.inputFileName)
+        alignTransformation.setOutFileName(self.outputFileName)
+        alignTransformation.setConsensusFileName(self.consensusFile)
+        alignTransformation.run()
+        f = open(self._expFileName, "w")
+        f.write("blumeria_Grouper_590_20:NoCat\t271\t324\tDUF234\t5\t22\t1.5\t3\t0.000000\n")
+        f.write("blumeria_Grouper_590_20:NoCat\t331\t357\tDUF1414\t1\t9\t6.3\t2\t0.000000\n")
+        f.write("blumeria_Grouper_590_20:NoCat\t90\t113\tCPW_WPC\t1\t9\t7.7\t1\t0.000000\n")
+        f.write("blumeria_Grouper_590_20:NoCat\t119\t148\tDUF46\t182\t173\t0.11\t6\t0.000000\n")
+        f.close()
+        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expFileName, self.outputFileName))
+  
+    #TODO: is it normal ?
+    def test_run_header_without_frame(self):
+        f = open(self.inputFileName, "w")
+        f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t186\t218\tPF02022.11_Integrase_Zn_INT_13.5\t1\t40\t8.9\t-20.4\t0\n")
+        f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t209\t266\tPF04236.7_Transp_Tc5_C_Tase_25.0\t1\t61\t8.5\t-38.8\t0\n")
+        f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t256\t266\tPF04236.7_Transp_Tc5_C_Tase_30.0\t51\t61\t2.3\t-0.2\t0\n")
+        f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t388\t395\tPF10576.1_EndIII_4Fe-2S_EN_17.5\t10\t17\t3.9\t-0.0\t0\n")
+        f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t425\t433\tPF05410.5_Peptidase_C31_CYP_25.0\t98\t106\t1.9\t-2.0\t0\n")
+        f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t487\t497\tPF09225.2_Endonuc-PvuII_EN_25.0\t127\t137\t4.9\t-3.5\t0\n")
+        f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t574\t581\tPF02093.8_Gag_p30_GAG_10.5\t1\t8\t7.3\t-4.5\t0\n")
+        f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t693\t706\tPF00910.14_RNA_helicase_HEL_20.0\t1\t14\t3.8\t-1.0\t0\n")
+        f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t735\t743\tPF09569.2_RE_ScaI_EN_25.0\t1\t9\t7.1\t-3.3\t0\n")
+        f.write("BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\t737\t743\tPF03577.7_Peptidase_C69_CYP_25.0\t459\t465\t6.2\t-4.8\t0\n")
+        f.close()
+        f = open(self.consensusFile, "w")
+        f.write(">BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp\n")
+        f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
+        f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
+        f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
+        f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
+        f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
+        f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
+        f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
+        f.close() 
+        alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
+        alignTransformation.setInFileName(self.inputFileName)
+        alignTransformation.setOutFileName(self.outputFileName)
+        alignTransformation.setConsensusFileName(self.consensusFile)
+        self.assertRaises(RepetException, alignTransformation.run)
+        
+    def test_extractFrameFromSeqName(self):
+        alignInstance = Align()
+        rangeQuery = Range()
+        rangeQuery.seqname = "BlastclustCluster251Mb1_Jr945-B-R1176-Map4_classII-Helitron-incomp"
+        alignInstance.range_query = rangeQuery
+        alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
+        self.assertRaises(RepetException, alignTransformation.extractFrameFromSeqName, alignInstance)
+        
+    def test_run_no_filter(self):
+        f = open(self.inputFileName, "w")
+        f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
+        f.close()
+        f = open(self.consensusFile, "w")
+        f.write(">blumeria_Grouper_590_20:NoCat\n")
+        f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
+        f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
+        f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
+        f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
+        f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
+        f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
+        f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
+        f.close() 
+        alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
+        alignTransformation.setInFileName(self.inputFileName)
+        alignTransformation.setOutFileName(self.outputFileName)
+        alignTransformation.setConsensusFileName(self.consensusFile)
+        alignTransformation.setIsFiltered(False)
+        alignTransformation.run()
+        self.assertTrue(FileUtils.getNbLinesInSingleFile(self.outputFileName), 6) 
+        self.assertTrue(FileUtils.isRessourceExists(self.inputFileName)) 
+    
+    def test_run_no_filter_clean_option(self):
+        f = open(self.inputFileName, "w")
+        f.write("blumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2\t0.0\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("blumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
+        f.close()
+        f = open(self.consensusFile, "w")
+        f.write(">blumeria_Grouper_590_20:NoCat\n")
+        f.write("TTTCGATCNTATTGAAATGTATAACCCACTACTTAGTTCGTGGACTTGTTGGTAGAGGGA\n")
+        f.write("AGCTTATGCAATAATGAAGGATAAAAGGATGTCAATTCGACTACTCTTCTAAACACAGAT\n")
+        f.write("ACCAGACTCCTTTTAATCCCAATAGATAGCCCGTGCGGGATCTCCTATTAACAGCAGTAA\n")
+        f.write("ACAACGCTAACACAGGGTATACGCAATCTCCGTTCGCCAAACACCAGTCTGTCGGGACTA\n")
+        f.write("CTAATTATCGAGCCTAGTAGGATCGACAATGTGTATCCCAACATAGAAATAATAGAAAGC\n")
+        f.write("TCAACACCCAAACCCCTCATCAATAACTGCCATTAATCATCACCTGACTTATCTCTGTAC\n")
+        f.write("TCCATAATTTCAACACTNAAGAATATTTGTA")
+        f.close() 
+        alignTransformation = TransformAACoordIntoNtCoordInAlignFormat()
+        alignTransformation.setInFileName(self.inputFileName)
+        alignTransformation.setOutFileName(self.outputFileName)
+        alignTransformation.setConsensusFileName(self.consensusFile)
+        alignTransformation.setIsFiltered(True)
+        alignTransformation.setIsClean(True)
+        alignTransformation.run()
+        self.assertTrue(FileUtils.getNbLinesInSingleFile(self.outputFileName), 6) 
+        self.assertFalse(FileUtils.isRessourceExists(self.inputFileName))        
+            
+if __name__ == "__main__" :
+    unittest.main() 
\ No newline at end of file