diff commons/core/parsing/test/Test_BlatToGffForBesPaired.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BlatToGffForBesPaired.py	Fri Jan 18 04:54:14 2013 -0500
@@ -0,0 +1,292 @@
+import unittest, os
+from commons.core.parsing.BlatToGffForBesPaired import BlatToGffForBesPaired
+
+
+class Test_BlatToGffForBesPaired(unittest.TestCase):
+
+
+    def test_convertBlatObjectToGffLine(self):
+        blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
+        nbLine = 15
+        besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']
+        self._writeBesSequences(besFastaFileName)
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        iBlatToGffForBesPaired._methodName = ''
+        iBlatToGffForBesPaired._inputFileFasta = besFastaFileName
+        obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)
+        expGffLine = 'chr16\tBlatToGffForBesPaired\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n'
+        expBesName = 'MRRE1H001H13FM1'
+        expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'
+        expBesType = 'FM'
+        self.assertEquals(expGffLine, obsGffLine)
+        self.assertEquals(expBesName, obsBesName)
+        self.assertEquals(expBesSeq, obsBesSeq)
+        self.assertEquals(expBesType, obsBesType)
+        os.remove(besFastaFileName)
+
+    def test_convertBlatObjectToGffLine_with_methodName(self):
+        blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
+        nbLine = 15
+        besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']
+        self._writeBesSequences(besFastaFileName)
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        iBlatToGffForBesPaired._methodName = 'Test'
+        iBlatToGffForBesPaired._inputFileFasta = besFastaFileName
+        obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)
+        expGffLine = 'chr16\tBlatToGffForBesPaired\tTest:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n'
+        expBesName = 'MRRE1H001H13FM1'
+        expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'
+        expBesType = 'FM'
+        self.assertEquals(expGffLine, obsGffLine)
+        self.assertEquals(expBesName, obsBesName)
+        self.assertEquals(expBesSeq, obsBesSeq)
+        self.assertEquals(expBesType, obsBesType)
+        os.remove(besFastaFileName)
+    
+    def test_getBesName(self):
+        col9 = 'ID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n'
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        obsBesName = iBlatToGffForBesPaired.getBesName(col9)
+        expBesName = 'machin1'
+        self.assertEquals(expBesName, obsBesName)
+                
+    def test_checkBesNames_OK(self):
+        besName1 = 'MRRE1H001H13FM8'
+        besName2 = 'MRRE1H001H13RM2'
+        line = 10
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        self.assertTrue(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))
+        
+    def test_checkBesNames_NOK(self):
+        besName1 = 'MRRE1H001H13FM1'
+        besName2 = 'TOTORM2'
+        line = 10
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        self.assertFalse(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))
+        
+    def test_checkBesPositions_OK1(self):
+        tBes1 = ('chr16', 25, 150)
+        tBes2 = ('chr16', 300, 350)
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
+        
+    def test_checkBesPositions_OK2(self):
+        tBes1 = ('chr16', 300, 350)
+        tBes2 = ('chr16', 3, 50)
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
+    
+    def test_checkBesPositions_NOK1(self):
+        tBes1 = ('chr16', 25, 150)
+        tBes2 = ('chr14', 300, 350)
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
+    
+    def test_checkBesPositions_NOK2(self):
+        tBes1 = ('chr16', 25, 300)
+        tBes2 = ('chr16', 150, 350)
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
+    
+    def test_checkBesPositions_NOK3(self):
+        tBes1 = ('chr16', 25, 300)
+        tBes2 = ('chr16', 1, 50)
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
+        
+    def test_getBacName(self):
+        besName = 'MRRE1H001H13FM1'
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        obsBacName = iBlatToGffForBesPaired.getBacName(besName)
+        expBacName = 'MRRE1H001H13'
+        self.assertEquals(expBacName, obsBacName)
+        
+    def test_getBacPositions_case1(self):
+        tBes1 = ('chr16', 25, 300)
+        tBes2 = ('chr16', 1, 50)
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2)
+        expStart = 1
+        expEnd = 300
+        self.assertEquals(expStart, obsStart)
+        self.assertEquals(expEnd, obsEnd)
+        
+    def test_getBacPositions_case2(self):
+        tBes1 = ('chr16', 1, 300)
+        tBes2 = ('chr16', 1000, 50000)
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2)
+        expStart = 1
+        expEnd = 50000
+        self.assertEquals(expStart, obsStart)
+        self.assertEquals(expEnd, obsEnd)
+        
+    def test_getBacPositions_case3(self):
+        tBes1 = ('chr16', 300, 25)
+        tBes2 = ('chr16', 1, 50)
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2)
+        expStart = 1
+        expEnd = 300
+        self.assertEquals(expStart, obsStart)
+        self.assertEquals(expEnd, obsEnd)
+        
+    def test_createGffLineForBac(self):
+        gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n'
+        nameBes1 = 'MRRE1H001H13FM1'
+        seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG'
+        typeBes1 = 'FM'
+        gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
+        nameBes2 = 'MRRE1H001H13RM2'
+        seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT'
+        typeBes2 = 'RM'
+        line = 2
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        iBlatToGffForBesPaired._methodName = ''
+        obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line)
+        expGffBac = 'chr16\tBlatToGffForBesPaired\tBAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
+        self.assertEquals(expGffBac, obsGffBac)
+        
+    def test_createGffLineForBac_with_methodName(self):
+        gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n'
+        nameBes1 = 'MRRE1H001H13FM1'
+        seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG'
+        typeBes1 = 'FM'
+        gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
+        nameBes2 = 'MRRE1H001H13RM2'
+        seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT'
+        typeBes2 = 'RM'
+        line = 2
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        iBlatToGffForBesPaired._methodName = 'Test'
+        obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line)
+        expGffBac = 'chr16\tBlatToGffForBesPaired\tTest:BAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
+        self.assertEquals(expGffBac, obsGffBac)
+        
+    def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInMultipleLines(self):
+        fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH']
+        fastaFile = open(fastaFileName, 'w')
+        fastaFile.write('>seq1\n')
+        fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n')
+        fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n')
+        fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n')
+        fastaFile.write('ATCGAC\n')
+        fastaFile.write('>seq2\n')
+        fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n')
+        fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n')
+        fastaFile.write('ACTGACACTGTACGTAC\n')
+        fastaFile.write('>seq3\n')
+        fastaFile.write('ACTCGATCGATCG\n')
+        fastaFile.close()
+        
+        seqName = 'seq1'
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        iBlatToGffForBesPaired._inputFileFasta = fastaFileName
+        obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)
+        expSeq = 'ATCGATCGATCGATCGATACGTCAGCGATCGATTACGTACGTACGATCGATCGATCGATCGATCGGTACGTACGTACGATCGACGATCGATGCCGATCGATCGAC'
+        self.assertEquals(expSeq, obsSeq)
+        os.remove(fastaFileName)
+        
+    def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInUniqueLines(self):
+        fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH']
+        fastaFile = open(fastaFileName, 'w')
+        fastaFile.write('>seq1\n')
+        fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n')
+        fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n')
+        fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n')
+        fastaFile.write('ATCGAC\n')
+        fastaFile.write('>seq2\n')
+        fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n')
+        fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n')
+        fastaFile.write('ACTGACACTGTACGTAC\n')
+        fastaFile.write('>seq3\n')
+        fastaFile.write('ACTCGATCGATCG\n')
+        fastaFile.close()
+        
+        seqName = 'seq3'
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        iBlatToGffForBesPaired._inputFileFasta = fastaFileName
+        obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)
+        expSeq = 'ACTCGATCGATCG'
+        self.assertEquals(expSeq, obsSeq)
+        os.remove(fastaFileName)
+        
+    def test_extractBesSequenceFromFastaFileToTmpFile_without_seqInThisFastaFile(self):
+        fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH']
+        fastaFile = open(fastaFileName, 'w')
+        fastaFile.write('>seq1\n')
+        fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n')
+        fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n')
+        fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n')
+        fastaFile.write('ATCGAC\n')
+        fastaFile.write('>seq2\n')
+        fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n')
+        fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n')
+        fastaFile.write('ACTGACACTGTACGTAC\n')
+        fastaFile.write('>seq3\n')
+        fastaFile.write('ACTCGATCGATCG\n')
+        fastaFile.close()
+        
+        seqName = 'seq4'
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        iBlatToGffForBesPaired._inputFileFasta = fastaFileName
+        obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)
+        expSeq = 'NA'
+        self.assertEquals(expSeq, obsSeq)
+        os.remove(fastaFileName)
+        
+    def test_getBesFmAndRmNamesAndSequences_case1(self):
+        nameBes1 = 'MRRE1H0072T1FM1'
+        seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
+        typeBes1 = 'FM'
+        nameBes2 = 'MRRE1H0072T1RM3'
+        seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
+        typeBes2 = 'RM'
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)
+        expNameBesFM = 'MRRE1H0072T1FM1'
+        expNameBesRM = 'MRRE1H0072T1RM3'
+        expSeqBesFM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
+        expSeqBesRM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
+        self.assertEquals(expNameBesFM, obsNameBesFM)
+        self.assertEquals(expNameBesRM, obsNameBesRM)
+        self.assertEquals(expSeqBesFM, obsSeqBesFM)
+        self.assertEquals(expSeqBesRM, obsSeqBesRM)
+        
+    def test_getBesFmAndRmNamesAndSequences_case2(self):
+        nameBes1 = 'MRRE1H0072T1RM1'
+        seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
+        typeBes1 = 'RM'
+        nameBes2 = 'MRRE1H0072T1FM3'
+        seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
+        typeBes2 = 'FM'
+        iBlatToGffForBesPaired = BlatToGffForBesPaired()
+        obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)
+        expNameBesFM = 'MRRE1H0072T1FM3'
+        expNameBesRM = 'MRRE1H0072T1RM1'
+        expSeqBesFM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
+        expSeqBesRM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
+        self.assertEquals(expNameBesFM, obsNameBesFM)
+        self.assertEquals(expNameBesRM, obsNameBesRM)
+        self.assertEquals(expSeqBesFM, obsSeqBesFM)
+        self.assertEquals(expSeqBesRM, obsSeqBesRM)
+        
+    def _writeBesSequences(self, fileName):
+        file = open(fileName, 'w')
+        file.write('>MRRE1H001H13RM1\n')
+        file.write('ATACGTACGTACGTCAGTACGACTACGTACGTACGTACGTCGTAC\n')
+        file.write('TACGTCAGCATCGTACGTACGTACGTCGTGCTGGCTAGCTGACGA\n')
+        file.write('ATCGATCGATCGATCGACATCGTACG\n')
+        file.write('>MRRE1H001H13FM1\n')
+        file.write('AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGC\n')
+        file.write('CTAGCTAGCTAGCTAGCTAGCTAGC\n')
+        file.write('>MRRE2H007A13FM3\n')
+        file.write('TCAGCTAGCTGACTGACATCGCTAGCTAGCTAGCTAGCTAGCTAG\n')
+        file.write('TACGCAGCTACGGGGCATCGACTAAAAAAAAAAACCCACGACTGG\n')
+        file.write('CTAGCTAGCTAGCTAGCTAGCTACGTCGATCGATCGACTGTTGCC\n')
+        file.write('TCAGCTACTGACTGATCGATCGACTACGTACGTACGTAC\n')
+        file.close()
+        
+        
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file