view smart_toolShed/commons/core/parsing/test/Test_BlatToGffForBesPaired.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line source

import unittest, os
from commons.core.parsing.BlatToGffForBesPaired import BlatToGffForBesPaired


class Test_BlatToGffForBesPaired(unittest.TestCase):


    def test_convertBlatObjectToGffLine(self):
        blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
        nbLine = 15
        besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']
        self._writeBesSequences(besFastaFileName)
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        iBlatToGffForBesPaired._methodName = ''
        iBlatToGffForBesPaired._inputFileFasta = besFastaFileName
        obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)
        expGffLine = 'chr16\tBlatToGffForBesPaired\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n'
        expBesName = 'MRRE1H001H13FM1'
        expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'
        expBesType = 'FM'
        self.assertEquals(expGffLine, obsGffLine)
        self.assertEquals(expBesName, obsBesName)
        self.assertEquals(expBesSeq, obsBesSeq)
        self.assertEquals(expBesType, obsBesType)
        os.remove(besFastaFileName)

    def test_convertBlatObjectToGffLine_with_methodName(self):
        blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
        nbLine = 15
        besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']
        self._writeBesSequences(besFastaFileName)
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        iBlatToGffForBesPaired._methodName = 'Test'
        iBlatToGffForBesPaired._inputFileFasta = besFastaFileName
        obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)
        expGffLine = 'chr16\tBlatToGffForBesPaired\tTest:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n'
        expBesName = 'MRRE1H001H13FM1'
        expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'
        expBesType = 'FM'
        self.assertEquals(expGffLine, obsGffLine)
        self.assertEquals(expBesName, obsBesName)
        self.assertEquals(expBesSeq, obsBesSeq)
        self.assertEquals(expBesType, obsBesType)
        os.remove(besFastaFileName)
    
    def test_getBesName(self):
        col9 = 'ID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n'
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        obsBesName = iBlatToGffForBesPaired.getBesName(col9)
        expBesName = 'machin1'
        self.assertEquals(expBesName, obsBesName)
                
    def test_checkBesNames_OK(self):
        besName1 = 'MRRE1H001H13FM8'
        besName2 = 'MRRE1H001H13RM2'
        line = 10
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        self.assertTrue(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))
        
    def test_checkBesNames_NOK(self):
        besName1 = 'MRRE1H001H13FM1'
        besName2 = 'TOTORM2'
        line = 10
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        self.assertFalse(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))
        
    def test_checkBesPositions_OK1(self):
        tBes1 = ('chr16', 25, 150)
        tBes2 = ('chr16', 300, 350)
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
        
    def test_checkBesPositions_OK2(self):
        tBes1 = ('chr16', 300, 350)
        tBes2 = ('chr16', 3, 50)
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
    
    def test_checkBesPositions_NOK1(self):
        tBes1 = ('chr16', 25, 150)
        tBes2 = ('chr14', 300, 350)
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
    
    def test_checkBesPositions_NOK2(self):
        tBes1 = ('chr16', 25, 300)
        tBes2 = ('chr16', 150, 350)
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
    
    def test_checkBesPositions_NOK3(self):
        tBes1 = ('chr16', 25, 300)
        tBes2 = ('chr16', 1, 50)
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
        
    def test_getBacName(self):
        besName = 'MRRE1H001H13FM1'
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        obsBacName = iBlatToGffForBesPaired.getBacName(besName)
        expBacName = 'MRRE1H001H13'
        self.assertEquals(expBacName, obsBacName)
        
    def test_getBacPositions_case1(self):
        tBes1 = ('chr16', 25, 300)
        tBes2 = ('chr16', 1, 50)
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2)
        expStart = 1
        expEnd = 300
        self.assertEquals(expStart, obsStart)
        self.assertEquals(expEnd, obsEnd)
        
    def test_getBacPositions_case2(self):
        tBes1 = ('chr16', 1, 300)
        tBes2 = ('chr16', 1000, 50000)
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2)
        expStart = 1
        expEnd = 50000
        self.assertEquals(expStart, obsStart)
        self.assertEquals(expEnd, obsEnd)
        
    def test_getBacPositions_case3(self):
        tBes1 = ('chr16', 300, 25)
        tBes2 = ('chr16', 1, 50)
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2)
        expStart = 1
        expEnd = 300
        self.assertEquals(expStart, obsStart)
        self.assertEquals(expEnd, obsEnd)
        
    def test_createGffLineForBac(self):
        gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n'
        nameBes1 = 'MRRE1H001H13FM1'
        seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG'
        typeBes1 = 'FM'
        gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
        nameBes2 = 'MRRE1H001H13RM2'
        seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT'
        typeBes2 = 'RM'
        line = 2
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        iBlatToGffForBesPaired._methodName = ''
        obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line)
        expGffBac = 'chr16\tBlatToGffForBesPaired\tBAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
        self.assertEquals(expGffBac, obsGffBac)
        
    def test_createGffLineForBac_with_methodName(self):
        gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n'
        nameBes1 = 'MRRE1H001H13FM1'
        seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG'
        typeBes1 = 'FM'
        gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
        nameBes2 = 'MRRE1H001H13RM2'
        seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT'
        typeBes2 = 'RM'
        line = 2
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        iBlatToGffForBesPaired._methodName = 'Test'
        obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line)
        expGffBac = 'chr16\tBlatToGffForBesPaired\tTest:BAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
        self.assertEquals(expGffBac, obsGffBac)
        
    def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInMultipleLines(self):
        fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH']
        fastaFile = open(fastaFileName, 'w')
        fastaFile.write('>seq1\n')
        fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n')
        fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n')
        fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n')
        fastaFile.write('ATCGAC\n')
        fastaFile.write('>seq2\n')
        fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n')
        fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n')
        fastaFile.write('ACTGACACTGTACGTAC\n')
        fastaFile.write('>seq3\n')
        fastaFile.write('ACTCGATCGATCG\n')
        fastaFile.close()
        
        seqName = 'seq1'
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        iBlatToGffForBesPaired._inputFileFasta = fastaFileName
        obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)
        expSeq = 'ATCGATCGATCGATCGATACGTCAGCGATCGATTACGTACGTACGATCGATCGATCGATCGATCGGTACGTACGTACGATCGACGATCGATGCCGATCGATCGAC'
        self.assertEquals(expSeq, obsSeq)
        os.remove(fastaFileName)
        
    def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInUniqueLines(self):
        fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH']
        fastaFile = open(fastaFileName, 'w')
        fastaFile.write('>seq1\n')
        fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n')
        fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n')
        fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n')
        fastaFile.write('ATCGAC\n')
        fastaFile.write('>seq2\n')
        fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n')
        fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n')
        fastaFile.write('ACTGACACTGTACGTAC\n')
        fastaFile.write('>seq3\n')
        fastaFile.write('ACTCGATCGATCG\n')
        fastaFile.close()
        
        seqName = 'seq3'
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        iBlatToGffForBesPaired._inputFileFasta = fastaFileName
        obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)
        expSeq = 'ACTCGATCGATCG'
        self.assertEquals(expSeq, obsSeq)
        os.remove(fastaFileName)
        
    def test_extractBesSequenceFromFastaFileToTmpFile_without_seqInThisFastaFile(self):
        fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH']
        fastaFile = open(fastaFileName, 'w')
        fastaFile.write('>seq1\n')
        fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n')
        fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n')
        fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n')
        fastaFile.write('ATCGAC\n')
        fastaFile.write('>seq2\n')
        fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n')
        fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n')
        fastaFile.write('ACTGACACTGTACGTAC\n')
        fastaFile.write('>seq3\n')
        fastaFile.write('ACTCGATCGATCG\n')
        fastaFile.close()
        
        seqName = 'seq4'
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        iBlatToGffForBesPaired._inputFileFasta = fastaFileName
        obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)
        expSeq = 'NA'
        self.assertEquals(expSeq, obsSeq)
        os.remove(fastaFileName)
        
    def test_getBesFmAndRmNamesAndSequences_case1(self):
        nameBes1 = 'MRRE1H0072T1FM1'
        seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
        typeBes1 = 'FM'
        nameBes2 = 'MRRE1H0072T1RM3'
        seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
        typeBes2 = 'RM'
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)
        expNameBesFM = 'MRRE1H0072T1FM1'
        expNameBesRM = 'MRRE1H0072T1RM3'
        expSeqBesFM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
        expSeqBesRM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
        self.assertEquals(expNameBesFM, obsNameBesFM)
        self.assertEquals(expNameBesRM, obsNameBesRM)
        self.assertEquals(expSeqBesFM, obsSeqBesFM)
        self.assertEquals(expSeqBesRM, obsSeqBesRM)
        
    def test_getBesFmAndRmNamesAndSequences_case2(self):
        nameBes1 = 'MRRE1H0072T1RM1'
        seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
        typeBes1 = 'RM'
        nameBes2 = 'MRRE1H0072T1FM3'
        seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
        typeBes2 = 'FM'
        iBlatToGffForBesPaired = BlatToGffForBesPaired()
        obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)
        expNameBesFM = 'MRRE1H0072T1FM3'
        expNameBesRM = 'MRRE1H0072T1RM1'
        expSeqBesFM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
        expSeqBesRM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
        self.assertEquals(expNameBesFM, obsNameBesFM)
        self.assertEquals(expNameBesRM, obsNameBesRM)
        self.assertEquals(expSeqBesFM, obsSeqBesFM)
        self.assertEquals(expSeqBesRM, obsSeqBesRM)
        
    def _writeBesSequences(self, fileName):
        file = open(fileName, 'w')
        file.write('>MRRE1H001H13RM1\n')
        file.write('ATACGTACGTACGTCAGTACGACTACGTACGTACGTACGTCGTAC\n')
        file.write('TACGTCAGCATCGTACGTACGTACGTCGTGCTGGCTAGCTGACGA\n')
        file.write('ATCGATCGATCGATCGACATCGTACG\n')
        file.write('>MRRE1H001H13FM1\n')
        file.write('AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGC\n')
        file.write('CTAGCTAGCTAGCTAGCTAGCTAGC\n')
        file.write('>MRRE2H007A13FM3\n')
        file.write('TCAGCTAGCTGACTGACATCGCTAGCTAGCTAGCTAGCTAGCTAG\n')
        file.write('TACGCAGCTACGGGGCATCGACTAAAAAAAAAAACCCACGACTGG\n')
        file.write('CTAGCTAGCTAGCTAGCTAGCTACGTCGATCGATCGACTGTTGCC\n')
        file.write('TCAGCTACTGACTGATCGATCGACTACGTACGTACGTAC\n')
        file.close()
        
        
if __name__ == "__main__":
    unittest.main()