Mercurial > repos > yufei-luo > s_mart
diff commons/core/parsing/test/Test_BlatToGffForBesPaired.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/parsing/test/Test_BlatToGffForBesPaired.py Fri Jan 18 04:54:14 2013 -0500 @@ -0,0 +1,292 @@ +import unittest, os +from commons.core.parsing.BlatToGffForBesPaired import BlatToGffForBesPaired + + +class Test_BlatToGffForBesPaired(unittest.TestCase): + + + def test_convertBlatObjectToGffLine(self): + blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n' + nbLine = 15 + besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH'] + self._writeBesSequences(besFastaFileName) + iBlatToGffForBesPaired = BlatToGffForBesPaired() + iBlatToGffForBesPaired._methodName = '' + iBlatToGffForBesPaired._inputFileFasta = besFastaFileName + obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine) + expGffLine = 'chr16\tBlatToGffForBesPaired\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n' + expBesName = 'MRRE1H001H13FM1' + expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC' + expBesType = 'FM' + self.assertEquals(expGffLine, obsGffLine) + self.assertEquals(expBesName, obsBesName) + self.assertEquals(expBesSeq, obsBesSeq) + self.assertEquals(expBesType, obsBesType) + os.remove(besFastaFileName) + + def test_convertBlatObjectToGffLine_with_methodName(self): + blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n' + nbLine = 15 + besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH'] + self._writeBesSequences(besFastaFileName) + iBlatToGffForBesPaired = BlatToGffForBesPaired() + iBlatToGffForBesPaired._methodName = 'Test' + iBlatToGffForBesPaired._inputFileFasta = besFastaFileName + obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine) + expGffLine = 'chr16\tBlatToGffForBesPaired\tTest:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n' + expBesName = 'MRRE1H001H13FM1' + expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC' + expBesType = 'FM' + self.assertEquals(expGffLine, obsGffLine) + self.assertEquals(expBesName, obsBesName) + self.assertEquals(expBesSeq, obsBesSeq) + self.assertEquals(expBesType, obsBesType) + os.remove(besFastaFileName) + + def test_getBesName(self): + col9 = 'ID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n' + iBlatToGffForBesPaired = BlatToGffForBesPaired() + obsBesName = iBlatToGffForBesPaired.getBesName(col9) + expBesName = 'machin1' + self.assertEquals(expBesName, obsBesName) + + def test_checkBesNames_OK(self): + besName1 = 'MRRE1H001H13FM8' + besName2 = 'MRRE1H001H13RM2' + line = 10 + iBlatToGffForBesPaired = BlatToGffForBesPaired() + self.assertTrue(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line)) + + def test_checkBesNames_NOK(self): + besName1 = 'MRRE1H001H13FM1' + besName2 = 'TOTORM2' + line = 10 + iBlatToGffForBesPaired = BlatToGffForBesPaired() + self.assertFalse(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line)) + + def test_checkBesPositions_OK1(self): + tBes1 = ('chr16', 25, 150) + tBes2 = ('chr16', 300, 350) + iBlatToGffForBesPaired = BlatToGffForBesPaired() + self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) + + def test_checkBesPositions_OK2(self): + tBes1 = ('chr16', 300, 350) + tBes2 = ('chr16', 3, 50) + iBlatToGffForBesPaired = BlatToGffForBesPaired() + self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) + + def test_checkBesPositions_NOK1(self): + tBes1 = ('chr16', 25, 150) + tBes2 = ('chr14', 300, 350) + iBlatToGffForBesPaired = BlatToGffForBesPaired() + self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) + + def test_checkBesPositions_NOK2(self): + tBes1 = ('chr16', 25, 300) + tBes2 = ('chr16', 150, 350) + iBlatToGffForBesPaired = BlatToGffForBesPaired() + self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) + + def test_checkBesPositions_NOK3(self): + tBes1 = ('chr16', 25, 300) + tBes2 = ('chr16', 1, 50) + iBlatToGffForBesPaired = BlatToGffForBesPaired() + self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) + + def test_getBacName(self): + besName = 'MRRE1H001H13FM1' + iBlatToGffForBesPaired = BlatToGffForBesPaired() + obsBacName = iBlatToGffForBesPaired.getBacName(besName) + expBacName = 'MRRE1H001H13' + self.assertEquals(expBacName, obsBacName) + + def test_getBacPositions_case1(self): + tBes1 = ('chr16', 25, 300) + tBes2 = ('chr16', 1, 50) + iBlatToGffForBesPaired = BlatToGffForBesPaired() + obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2) + expStart = 1 + expEnd = 300 + self.assertEquals(expStart, obsStart) + self.assertEquals(expEnd, obsEnd) + + def test_getBacPositions_case2(self): + tBes1 = ('chr16', 1, 300) + tBes2 = ('chr16', 1000, 50000) + iBlatToGffForBesPaired = BlatToGffForBesPaired() + obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2) + expStart = 1 + expEnd = 50000 + self.assertEquals(expStart, obsStart) + self.assertEquals(expEnd, obsEnd) + + def test_getBacPositions_case3(self): + tBes1 = ('chr16', 300, 25) + tBes2 = ('chr16', 1, 50) + iBlatToGffForBesPaired = BlatToGffForBesPaired() + obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2) + expStart = 1 + expEnd = 300 + self.assertEquals(expStart, obsStart) + self.assertEquals(expEnd, obsEnd) + + def test_createGffLineForBac(self): + gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n' + nameBes1 = 'MRRE1H001H13FM1' + seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG' + typeBes1 = 'FM' + gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' + nameBes2 = 'MRRE1H001H13RM2' + seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT' + typeBes2 = 'RM' + line = 2 + iBlatToGffForBesPaired = BlatToGffForBesPaired() + iBlatToGffForBesPaired._methodName = '' + obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line) + expGffBac = 'chr16\tBlatToGffForBesPaired\tBAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' + self.assertEquals(expGffBac, obsGffBac) + + def test_createGffLineForBac_with_methodName(self): + gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n' + nameBes1 = 'MRRE1H001H13FM1' + seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG' + typeBes1 = 'FM' + gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' + nameBes2 = 'MRRE1H001H13RM2' + seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT' + typeBes2 = 'RM' + line = 2 + iBlatToGffForBesPaired = BlatToGffForBesPaired() + iBlatToGffForBesPaired._methodName = 'Test' + obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line) + expGffBac = 'chr16\tBlatToGffForBesPaired\tTest:BAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' + self.assertEquals(expGffBac, obsGffBac) + + def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInMultipleLines(self): + fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH'] + fastaFile = open(fastaFileName, 'w') + fastaFile.write('>seq1\n') + fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n') + fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n') + fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n') + fastaFile.write('ATCGAC\n') + fastaFile.write('>seq2\n') + fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n') + fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n') + fastaFile.write('ACTGACACTGTACGTAC\n') + fastaFile.write('>seq3\n') + fastaFile.write('ACTCGATCGATCG\n') + fastaFile.close() + + seqName = 'seq1' + iBlatToGffForBesPaired = BlatToGffForBesPaired() + iBlatToGffForBesPaired._inputFileFasta = fastaFileName + obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5) + expSeq = 'ATCGATCGATCGATCGATACGTCAGCGATCGATTACGTACGTACGATCGATCGATCGATCGATCGGTACGTACGTACGATCGACGATCGATGCCGATCGATCGAC' + self.assertEquals(expSeq, obsSeq) + os.remove(fastaFileName) + + def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInUniqueLines(self): + fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH'] + fastaFile = open(fastaFileName, 'w') + fastaFile.write('>seq1\n') + fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n') + fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n') + fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n') + fastaFile.write('ATCGAC\n') + fastaFile.write('>seq2\n') + fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n') + fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n') + fastaFile.write('ACTGACACTGTACGTAC\n') + fastaFile.write('>seq3\n') + fastaFile.write('ACTCGATCGATCG\n') + fastaFile.close() + + seqName = 'seq3' + iBlatToGffForBesPaired = BlatToGffForBesPaired() + iBlatToGffForBesPaired._inputFileFasta = fastaFileName + obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5) + expSeq = 'ACTCGATCGATCG' + self.assertEquals(expSeq, obsSeq) + os.remove(fastaFileName) + + def test_extractBesSequenceFromFastaFileToTmpFile_without_seqInThisFastaFile(self): + fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH'] + fastaFile = open(fastaFileName, 'w') + fastaFile.write('>seq1\n') + fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n') + fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n') + fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n') + fastaFile.write('ATCGAC\n') + fastaFile.write('>seq2\n') + fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n') + fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n') + fastaFile.write('ACTGACACTGTACGTAC\n') + fastaFile.write('>seq3\n') + fastaFile.write('ACTCGATCGATCG\n') + fastaFile.close() + + seqName = 'seq4' + iBlatToGffForBesPaired = BlatToGffForBesPaired() + iBlatToGffForBesPaired._inputFileFasta = fastaFileName + obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5) + expSeq = 'NA' + self.assertEquals(expSeq, obsSeq) + os.remove(fastaFileName) + + def test_getBesFmAndRmNamesAndSequences_case1(self): + nameBes1 = 'MRRE1H0072T1FM1' + seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' + typeBes1 = 'FM' + nameBes2 = 'MRRE1H0072T1RM3' + seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' + typeBes2 = 'RM' + iBlatToGffForBesPaired = BlatToGffForBesPaired() + obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2) + expNameBesFM = 'MRRE1H0072T1FM1' + expNameBesRM = 'MRRE1H0072T1RM3' + expSeqBesFM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' + expSeqBesRM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' + self.assertEquals(expNameBesFM, obsNameBesFM) + self.assertEquals(expNameBesRM, obsNameBesRM) + self.assertEquals(expSeqBesFM, obsSeqBesFM) + self.assertEquals(expSeqBesRM, obsSeqBesRM) + + def test_getBesFmAndRmNamesAndSequences_case2(self): + nameBes1 = 'MRRE1H0072T1RM1' + seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' + typeBes1 = 'RM' + nameBes2 = 'MRRE1H0072T1FM3' + seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' + typeBes2 = 'FM' + iBlatToGffForBesPaired = BlatToGffForBesPaired() + obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2) + expNameBesFM = 'MRRE1H0072T1FM3' + expNameBesRM = 'MRRE1H0072T1RM1' + expSeqBesFM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' + expSeqBesRM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' + self.assertEquals(expNameBesFM, obsNameBesFM) + self.assertEquals(expNameBesRM, obsNameBesRM) + self.assertEquals(expSeqBesFM, obsSeqBesFM) + self.assertEquals(expSeqBesRM, obsSeqBesRM) + + def _writeBesSequences(self, fileName): + file = open(fileName, 'w') + file.write('>MRRE1H001H13RM1\n') + file.write('ATACGTACGTACGTCAGTACGACTACGTACGTACGTACGTCGTAC\n') + file.write('TACGTCAGCATCGTACGTACGTACGTCGTGCTGGCTAGCTGACGA\n') + file.write('ATCGATCGATCGATCGACATCGTACG\n') + file.write('>MRRE1H001H13FM1\n') + file.write('AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGC\n') + file.write('CTAGCTAGCTAGCTAGCTAGCTAGC\n') + file.write('>MRRE2H007A13FM3\n') + file.write('TCAGCTAGCTGACTGACATCGCTAGCTAGCTAGCTAGCTAGCTAG\n') + file.write('TACGCAGCTACGGGGCATCGACTAAAAAAAAAAACCCACGACTGG\n') + file.write('CTAGCTAGCTAGCTAGCTAGCTACGTCGATCGATCGACTGTTGCC\n') + file.write('TCAGCTACTGACTGATCGATCGACTACGTACGTACGTAC\n') + file.close() + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file