Mercurial > repos > yufei-luo > s_mart
comparison commons/core/parsing/test/Test_F_BlatToGffForBesPaired.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
30:5677346472b5 | 31:0ab839023fe4 |
---|---|
1 import unittest | |
2 import os | |
3 from commons.core.utils.FileUtils import FileUtils | |
4 | |
5 | |
6 class Test_F_BlatToGffForBesPaired(unittest.TestCase): | |
7 | |
8 | |
9 def test_run(self): | |
10 blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH'] | |
11 self._writeBlatInputFileName(blatInputFileName) | |
12 fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH'] | |
13 self._writeFastaInputFile(fastaInputFileName) | |
14 | |
15 obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH'] | |
16 cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName) | |
17 os.system(cmd) | |
18 | |
19 expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH'] | |
20 self._writeExpOutputFileName(expOutputFileName) | |
21 self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName)) | |
22 os.remove(blatInputFileName) | |
23 os.remove(fastaInputFileName) | |
24 os.remove(expOutputFileName) | |
25 os.remove(obsOutputFileName) | |
26 | |
27 def test_run_with_methodName(self): | |
28 blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH'] | |
29 self._writeBlatInputFileName(blatInputFileName) | |
30 fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH'] | |
31 self._writeFastaInputFile(fastaInputFileName) | |
32 | |
33 obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH'] | |
34 cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s -n TestF' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName) | |
35 os.system(cmd) | |
36 | |
37 expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH'] | |
38 self._writeExpOutputFileName_with_methodName(expOutputFileName) | |
39 self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName)) | |
40 os.remove(blatInputFileName) | |
41 os.remove(fastaInputFileName) | |
42 os.remove(expOutputFileName) | |
43 os.remove(obsOutputFileName) | |
44 | |
45 def _writeBlatInputFileName(self, blatInputFileName): | |
46 file = open(blatInputFileName, 'w') | |
47 file.write('psLayout version 3\n') | |
48 file.write('\n') | |
49 file.write('match mis- rep. N\'s Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStarts tStarts\n') | |
50 file.write(' match match count bases count bases name size start end name size start end count\n') | |
51 file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\n') | |
52 file.write('315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n') | |
53 file.write('690\t11\t0\t0\t1\t3\t2\t4\t-\tMRRE1H001H13RM1\t704\t0\t704\tchr16\t22053297\t21736364\t21737069\t3\t40,647,14,\t0,43,690,\t21736364,21736406,21737055,\n') | |
54 file.write('554\t26\t0\t0\t1\t16\t1\t17\t+\tMACHINFM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n') | |
55 file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tBIDULERM1\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n') | |
56 file.write('554\t26\t0\t0\t1\t16\t1\t17\t+\tMRRE1H032F08FM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n') | |
57 file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tMRRE1H032F08RM1\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n') | |
58 file.write('347\t25\t0\t0\t0\t0\t0\t0\t-\tMRRE1B072N12FM1\t393\t21\t393\tchr18\t29360087\t12067347\t12067719\t1\t372,\t0,\t12067347,\n') | |
59 file.write('294\t16\t0\t0\t0\t0\t2\t393\t+\tMRRE1B072N12RM1\t339\t21\t331\tchr18\t29360087\t11978635\t11979338\t3\t146,154,10,\t21,167,321,\t11978635,11978783,11979328,\n') | |
60 file.close() | |
61 | |
62 def _writeExpOutputFileName(self, expOutputFileName): | |
63 file = open(expOutputFileName, 'w') | |
64 file.write('##gff-version 3\n') | |
65 file.write('chr16\tBlatToGffForBesPaired\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG\n') | |
66 file.write('chr16\tBlatToGffForBesPaired\tBES\t21736364\t21737069\t.\t+\t.\tID=MRRE1H001H13RM1;Name=MRRE1H001H13RM1;bes_start=21736364;bes_end=21737069;bes_size=22053297;muscadine_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n') | |
67 file.write('chr16\tBlatToGffForBesPaired\tBAC\t21686950\t21737069\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=21686950;bac_end=21737069;bac_size=50120;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG;besRM_name=MRRE1H001H13RM1;muscadine_besRM_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n') | |
68 file.write('chr11\tBlatToGffForBesPaired\tBES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926;muscadine_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG\n') | |
69 file.write('chr11\tBlatToGffForBesPaired\tBES\t3794984\t3795627\t.\t+\t.\tID=MRRE1H032F08RM1;Name=MRRE1H032F08RM1;bes_start=3794984;bes_end=3795627;bes_size=19818926;muscadine_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n') | |
70 file.write('chr11\tBlatToGffForBesPaired\tBAC\t3725876\t3795627\t.\t.\t.\tID=MRRE1H032F08;Name=MRRE1H032F08;bac_start=3725876;bac_end=3795627;bac_size=69752;besFM_name=MRRE1H032F08FM1;muscadine_besFM_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG;besRM_name=MRRE1H032F08RM1;muscadine_besRM_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n') | |
71 file.write('chr18\tBlatToGffForBesPaired\tBES\t12067347\t12067719\t.\t+\t.\tID=MRRE1B072N12FM1;Name=MRRE1B072N12FM1;bes_start=12067347;bes_end=12067719;bes_size=29360087;muscadine_seq=ATCGTACGTACGATCGATCGCATGACTACGT\n') | |
72 file.write('chr18\tBlatToGffForBesPaired\tBES\t11978635\t11979338\t.\t+\t.\tID=MRRE1B072N12RM1;Name=MRRE1B072N12RM1;bes_start=11978635;bes_end=11979338;bes_size=29360087;muscadine_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n') | |
73 file.write('chr18\tBlatToGffForBesPaired\tBAC\t11978635\t12067719\t.\t.\t.\tID=MRRE1B072N12;Name=MRRE1B072N12;bac_start=11978635;bac_end=12067719;bac_size=89085;besFM_name=MRRE1B072N12FM1;muscadine_besFM_seq=ATCGTACGTACGATCGATCGCATGACTACGT;besRM_name=MRRE1B072N12RM1;muscadine_besRM_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n') | |
74 file.close() | |
75 | |
76 def _writeExpOutputFileName_with_methodName(self, expOutputFileName): | |
77 file = open(expOutputFileName, 'w') | |
78 file.write('##gff-version 3\n') | |
79 file.write('chr16\tBlatToGffForBesPaired\tTestF:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG\n') | |
80 file.write('chr16\tBlatToGffForBesPaired\tTestF:BES\t21736364\t21737069\t.\t+\t.\tID=MRRE1H001H13RM1;Name=MRRE1H001H13RM1;bes_start=21736364;bes_end=21737069;bes_size=22053297;muscadine_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n') | |
81 file.write('chr16\tBlatToGffForBesPaired\tTestF:BAC\t21686950\t21737069\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=21686950;bac_end=21737069;bac_size=50120;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG;besRM_name=MRRE1H001H13RM1;muscadine_besRM_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\n') | |
82 file.write('chr11\tBlatToGffForBesPaired\tTestF:BES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926;muscadine_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG\n') | |
83 file.write('chr11\tBlatToGffForBesPaired\tTestF:BES\t3794984\t3795627\t.\t+\t.\tID=MRRE1H032F08RM1;Name=MRRE1H032F08RM1;bes_start=3794984;bes_end=3795627;bes_size=19818926;muscadine_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n') | |
84 file.write('chr11\tBlatToGffForBesPaired\tTestF:BAC\t3725876\t3795627\t.\t.\t.\tID=MRRE1H032F08;Name=MRRE1H032F08;bac_start=3725876;bac_end=3795627;bac_size=69752;besFM_name=MRRE1H032F08FM1;muscadine_besFM_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG;besRM_name=MRRE1H032F08RM1;muscadine_besRM_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\n') | |
85 file.write('chr18\tBlatToGffForBesPaired\tTestF:BES\t12067347\t12067719\t.\t+\t.\tID=MRRE1B072N12FM1;Name=MRRE1B072N12FM1;bes_start=12067347;bes_end=12067719;bes_size=29360087;muscadine_seq=ATCGTACGTACGATCGATCGCATGACTACGT\n') | |
86 file.write('chr18\tBlatToGffForBesPaired\tTestF:BES\t11978635\t11979338\t.\t+\t.\tID=MRRE1B072N12RM1;Name=MRRE1B072N12RM1;bes_start=11978635;bes_end=11979338;bes_size=29360087;muscadine_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n') | |
87 file.write('chr18\tBlatToGffForBesPaired\tTestF:BAC\t11978635\t12067719\t.\t.\t.\tID=MRRE1B072N12;Name=MRRE1B072N12;bac_start=11978635;bac_end=12067719;bac_size=89085;besFM_name=MRRE1B072N12FM1;muscadine_besFM_seq=ATCGTACGTACGATCGATCGCATGACTACGT;besRM_name=MRRE1B072N12RM1;muscadine_besRM_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\n') | |
88 file.close() | |
89 | |
90 def _writeFastaInputFile(self, fileName): | |
91 file = open(fileName, 'w') | |
92 file.write('>MRRE1H001H13FM1\n') | |
93 file.write('ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC\n') | |
94 file.write('CTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGC\n') | |
95 file.write('ACTGCTAGCTACG\n') | |
96 file.write('>MRRE1H001H13RM1\n') | |
97 file.write('ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCG\n') | |
98 file.write('ACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGT\n') | |
99 file.write('ACTGATCGACTGATCGACTGC\n') | |
100 file.write('>MRRE1H032F08FM1\n') | |
101 file.write('TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGAT\n') | |
102 file.write('ATCGATCG\n') | |
103 file.write('>MRRE1H032F08RM1\n') | |
104 file.write('ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTG\n') | |
105 file.write('TACGTACGTAC\n') | |
106 file.write('>MRRE1B072N12FM1\n') | |
107 file.write('ATCGTACGTACGATCGATCGCATGACTACGT\n') | |
108 file.write('>MRRE1B072N12RM1\n') | |
109 file.write('TACGTACGATCGACTGATGCTAGCTAGCTCC\n') | |
110 file.write('>MACHINFM1\n') | |
111 file.write('ATCGTACGCTAGCTAGTCGATCGATCGATCGATCG\n') | |
112 file.write('>BIDULERM1\n') | |
113 file.write('ACTCGATCGACTACGTACGTAGACTG\n') | |
114 file.close() | |
115 | |
116 if __name__ == "__main__": | |
117 unittest.main() |