view commons/core/parsing/test/Test_F_BlatToGffForBesPaired.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 769e306b7933
children
line wrap: on
line source

import unittest
import os
from commons.core.utils.FileUtils import FileUtils


class Test_F_BlatToGffForBesPaired(unittest.TestCase):


    def test_run(self):
        blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']
        self._writeBlatInputFileName(blatInputFileName)
        fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH']
        self._writeFastaInputFile(fastaInputFileName)
        
        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH']
        cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName)
        os.system(cmd)
        
        expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH']
        self._writeExpOutputFileName(expOutputFileName)
        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
        os.remove(blatInputFileName)
        os.remove(fastaInputFileName)
        os.remove(expOutputFileName)
        os.remove(obsOutputFileName)
        
    def test_run_with_methodName(self):
        blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']
        self._writeBlatInputFileName(blatInputFileName)
        fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH']
        self._writeFastaInputFile(fastaInputFileName)
        
        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH']
        cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s -n TestF' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName)
        os.system(cmd)
        
        expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH']
        self._writeExpOutputFileName_with_methodName(expOutputFileName)
        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
        os.remove(blatInputFileName)
        os.remove(fastaInputFileName)
        os.remove(expOutputFileName)
        os.remove(obsOutputFileName)

    def _writeBlatInputFileName(self, blatInputFileName):
        file = open(blatInputFileName, 'w')
        file.write('psLayout version 3\n')
        file.write('\n')
        file.write('match    mis-     rep.     N\'s    Q gap    Q gap    T gap    T gap    strand    Q            Q       Q        Q      T            T       T        T      block    blockSizes     qStarts     tStarts\n')
        file.write('         match    match           count    bases    count    bases              name         size    start    end    name         size    start    end    count\n')
        file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\n')
        file.write('315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n')
        file.write('690\t11\t0\t0\t1\t3\t2\t4\t-\tMRRE1H001H13RM1\t704\t0\t704\tchr16\t22053297\t21736364\t21737069\t3\t40,647,14,\t0,43,690,\t21736364,21736406,21737055,\n')
        file.write('554\t26\t0\t0\t1\t16\t1\t17\t+\tMACHINFM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n')
        file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tBIDULERM1\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n')
        file.write('554\t26\t0\t0\t1\t16\t1\t17\t+\tMRRE1H032F08FM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n')
        file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tMRRE1H032F08RM1\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n')
        file.write('347\t25\t0\t0\t0\t0\t0\t0\t-\tMRRE1B072N12FM1\t393\t21\t393\tchr18\t29360087\t12067347\t12067719\t1\t372,\t0,\t12067347,\n')
        file.write('294\t16\t0\t0\t0\t0\t2\t393\t+\tMRRE1B072N12RM1\t339\t21\t331\tchr18\t29360087\t11978635\t11979338\t3\t146,154,10,\t21,167,321,\t11978635,11978783,11979328,\n')
        file.close()
        
    def _writeExpOutputFileName(self, expOutputFileName):
        file = open(expOutputFileName, 'w')
        file.write('##gff-version 3\n')
        file.write('chr16\tBlatToGffForBesPaired\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG\n')
        file.write('chr16\tBlatToGffForBesPaired\tBES\t21736364\t21737069\t.\t+\t.\tID=MRRE1H001H13RM1;Name=MRRE1H001H13RM1;bes_start=21736364;bes_end=21737069;bes_size=22053297;muscadine_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n')
        file.write('chr16\tBlatToGffForBesPaired\tBAC\t21686950\t21737069\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=21686950;bac_end=21737069;bac_size=50120;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG;besRM_name=MRRE1H001H13RM1;muscadine_besRM_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n')
        file.write('chr11\tBlatToGffForBesPaired\tBES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926;muscadine_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG\n')
        file.write('chr11\tBlatToGffForBesPaired\tBES\t3794984\t3795627\t.\t+\t.\tID=MRRE1H032F08RM1;Name=MRRE1H032F08RM1;bes_start=3794984;bes_end=3795627;bes_size=19818926;muscadine_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n')
        file.write('chr11\tBlatToGffForBesPaired\tBAC\t3725876\t3795627\t.\t.\t.\tID=MRRE1H032F08;Name=MRRE1H032F08;bac_start=3725876;bac_end=3795627;bac_size=69752;besFM_name=MRRE1H032F08FM1;muscadine_besFM_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG;besRM_name=MRRE1H032F08RM1;muscadine_besRM_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n')
        file.write('chr18\tBlatToGffForBesPaired\tBES\t12067347\t12067719\t.\t+\t.\tID=MRRE1B072N12FM1;Name=MRRE1B072N12FM1;bes_start=12067347;bes_end=12067719;bes_size=29360087;muscadine_seq=ATCGTACGTACGATCGATCGCATGACTACGT\n')
        file.write('chr18\tBlatToGffForBesPaired\tBES\t11978635\t11979338\t.\t+\t.\tID=MRRE1B072N12RM1;Name=MRRE1B072N12RM1;bes_start=11978635;bes_end=11979338;bes_size=29360087;muscadine_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n')
        file.write('chr18\tBlatToGffForBesPaired\tBAC\t11978635\t12067719\t.\t.\t.\tID=MRRE1B072N12;Name=MRRE1B072N12;bac_start=11978635;bac_end=12067719;bac_size=89085;besFM_name=MRRE1B072N12FM1;muscadine_besFM_seq=ATCGTACGTACGATCGATCGCATGACTACGT;besRM_name=MRRE1B072N12RM1;muscadine_besRM_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n')
        file.close()
        
    def _writeExpOutputFileName_with_methodName(self, expOutputFileName):
        file = open(expOutputFileName, 'w')
        file.write('##gff-version 3\n')
        file.write('chr16\tBlatToGffForBesPaired\tTestF:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG\n')
        file.write('chr16\tBlatToGffForBesPaired\tTestF:BES\t21736364\t21737069\t.\t+\t.\tID=MRRE1H001H13RM1;Name=MRRE1H001H13RM1;bes_start=21736364;bes_end=21737069;bes_size=22053297;muscadine_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n')
        file.write('chr16\tBlatToGffForBesPaired\tTestF:BAC\t21686950\t21737069\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=21686950;bac_end=21737069;bac_size=50120;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG;besRM_name=MRRE1H001H13RM1;muscadine_besRM_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n')
        file.write('chr11\tBlatToGffForBesPaired\tTestF:BES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926;muscadine_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG\n')
        file.write('chr11\tBlatToGffForBesPaired\tTestF:BES\t3794984\t3795627\t.\t+\t.\tID=MRRE1H032F08RM1;Name=MRRE1H032F08RM1;bes_start=3794984;bes_end=3795627;bes_size=19818926;muscadine_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n')
        file.write('chr11\tBlatToGffForBesPaired\tTestF:BAC\t3725876\t3795627\t.\t.\t.\tID=MRRE1H032F08;Name=MRRE1H032F08;bac_start=3725876;bac_end=3795627;bac_size=69752;besFM_name=MRRE1H032F08FM1;muscadine_besFM_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG;besRM_name=MRRE1H032F08RM1;muscadine_besRM_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n')
        file.write('chr18\tBlatToGffForBesPaired\tTestF:BES\t12067347\t12067719\t.\t+\t.\tID=MRRE1B072N12FM1;Name=MRRE1B072N12FM1;bes_start=12067347;bes_end=12067719;bes_size=29360087;muscadine_seq=ATCGTACGTACGATCGATCGCATGACTACGT\n')
        file.write('chr18\tBlatToGffForBesPaired\tTestF:BES\t11978635\t11979338\t.\t+\t.\tID=MRRE1B072N12RM1;Name=MRRE1B072N12RM1;bes_start=11978635;bes_end=11979338;bes_size=29360087;muscadine_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n')
        file.write('chr18\tBlatToGffForBesPaired\tTestF:BAC\t11978635\t12067719\t.\t.\t.\tID=MRRE1B072N12;Name=MRRE1B072N12;bac_start=11978635;bac_end=12067719;bac_size=89085;besFM_name=MRRE1B072N12FM1;muscadine_besFM_seq=ATCGTACGTACGATCGATCGCATGACTACGT;besRM_name=MRRE1B072N12RM1;muscadine_besRM_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n')
        file.close()
        
    def _writeFastaInputFile(self, fileName):
        file = open(fileName, 'w')
        file.write('>MRRE1H001H13FM1\n')
        file.write('ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC\n')
        file.write('CTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGC\n')
        file.write('ACTGCTAGCTACG\n')
        file.write('>MRRE1H001H13RM1\n')
        file.write('ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCG\n')
        file.write('ACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGT\n')
        file.write('ACTGATCGACTGATCGACTGC\n')
        file.write('>MRRE1H032F08FM1\n')
        file.write('TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGAT\n')
        file.write('ATCGATCG\n')
        file.write('>MRRE1H032F08RM1\n')
        file.write('ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTG\n')
        file.write('TACGTACGTAC\n')
        file.write('>MRRE1B072N12FM1\n')
        file.write('ATCGTACGTACGATCGATCGCATGACTACGT\n')
        file.write('>MRRE1B072N12RM1\n')
        file.write('TACGTACGATCGACTGATGCTAGCTAGCTCC\n')
        file.write('>MACHINFM1\n')
        file.write('ATCGTACGCTAGCTAGTCGATCGATCGATCGATCG\n')
        file.write('>BIDULERM1\n')
        file.write('ACTCGATCGACTACGTACGTAGACTG\n')
        file.close()
        
if __name__ == "__main__":
    unittest.main()