Mercurial > repos > yufei-luo > s_mart
comparison commons/core/parsing/test/Test_BlatToGffForBesPaired.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:ea3082881bf8 | 6:769e306b7933 |
---|---|
1 import unittest, os | |
2 from commons.core.parsing.BlatToGffForBesPaired import BlatToGffForBesPaired | |
3 | |
4 | |
5 class Test_BlatToGffForBesPaired(unittest.TestCase): | |
6 | |
7 | |
8 def test_convertBlatObjectToGffLine(self): | |
9 blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n' | |
10 nbLine = 15 | |
11 besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH'] | |
12 self._writeBesSequences(besFastaFileName) | |
13 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
14 iBlatToGffForBesPaired._methodName = '' | |
15 iBlatToGffForBesPaired._inputFileFasta = besFastaFileName | |
16 obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine) | |
17 expGffLine = 'chr16\tBlatToGffForBesPaired\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n' | |
18 expBesName = 'MRRE1H001H13FM1' | |
19 expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC' | |
20 expBesType = 'FM' | |
21 self.assertEquals(expGffLine, obsGffLine) | |
22 self.assertEquals(expBesName, obsBesName) | |
23 self.assertEquals(expBesSeq, obsBesSeq) | |
24 self.assertEquals(expBesType, obsBesType) | |
25 os.remove(besFastaFileName) | |
26 | |
27 def test_convertBlatObjectToGffLine_with_methodName(self): | |
28 blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n' | |
29 nbLine = 15 | |
30 besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH'] | |
31 self._writeBesSequences(besFastaFileName) | |
32 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
33 iBlatToGffForBesPaired._methodName = 'Test' | |
34 iBlatToGffForBesPaired._inputFileFasta = besFastaFileName | |
35 obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine) | |
36 expGffLine = 'chr16\tBlatToGffForBesPaired\tTest:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n' | |
37 expBesName = 'MRRE1H001H13FM1' | |
38 expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC' | |
39 expBesType = 'FM' | |
40 self.assertEquals(expGffLine, obsGffLine) | |
41 self.assertEquals(expBesName, obsBesName) | |
42 self.assertEquals(expBesSeq, obsBesSeq) | |
43 self.assertEquals(expBesType, obsBesType) | |
44 os.remove(besFastaFileName) | |
45 | |
46 def test_getBesName(self): | |
47 col9 = 'ID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n' | |
48 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
49 obsBesName = iBlatToGffForBesPaired.getBesName(col9) | |
50 expBesName = 'machin1' | |
51 self.assertEquals(expBesName, obsBesName) | |
52 | |
53 def test_checkBesNames_OK(self): | |
54 besName1 = 'MRRE1H001H13FM8' | |
55 besName2 = 'MRRE1H001H13RM2' | |
56 line = 10 | |
57 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
58 self.assertTrue(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line)) | |
59 | |
60 def test_checkBesNames_NOK(self): | |
61 besName1 = 'MRRE1H001H13FM1' | |
62 besName2 = 'TOTORM2' | |
63 line = 10 | |
64 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
65 self.assertFalse(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line)) | |
66 | |
67 def test_checkBesPositions_OK1(self): | |
68 tBes1 = ('chr16', 25, 150) | |
69 tBes2 = ('chr16', 300, 350) | |
70 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
71 self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) | |
72 | |
73 def test_checkBesPositions_OK2(self): | |
74 tBes1 = ('chr16', 300, 350) | |
75 tBes2 = ('chr16', 3, 50) | |
76 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
77 self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) | |
78 | |
79 def test_checkBesPositions_NOK1(self): | |
80 tBes1 = ('chr16', 25, 150) | |
81 tBes2 = ('chr14', 300, 350) | |
82 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
83 self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) | |
84 | |
85 def test_checkBesPositions_NOK2(self): | |
86 tBes1 = ('chr16', 25, 300) | |
87 tBes2 = ('chr16', 150, 350) | |
88 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
89 self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) | |
90 | |
91 def test_checkBesPositions_NOK3(self): | |
92 tBes1 = ('chr16', 25, 300) | |
93 tBes2 = ('chr16', 1, 50) | |
94 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
95 self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2)) | |
96 | |
97 def test_getBacName(self): | |
98 besName = 'MRRE1H001H13FM1' | |
99 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
100 obsBacName = iBlatToGffForBesPaired.getBacName(besName) | |
101 expBacName = 'MRRE1H001H13' | |
102 self.assertEquals(expBacName, obsBacName) | |
103 | |
104 def test_getBacPositions_case1(self): | |
105 tBes1 = ('chr16', 25, 300) | |
106 tBes2 = ('chr16', 1, 50) | |
107 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
108 obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2) | |
109 expStart = 1 | |
110 expEnd = 300 | |
111 self.assertEquals(expStart, obsStart) | |
112 self.assertEquals(expEnd, obsEnd) | |
113 | |
114 def test_getBacPositions_case2(self): | |
115 tBes1 = ('chr16', 1, 300) | |
116 tBes2 = ('chr16', 1000, 50000) | |
117 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
118 obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2) | |
119 expStart = 1 | |
120 expEnd = 50000 | |
121 self.assertEquals(expStart, obsStart) | |
122 self.assertEquals(expEnd, obsEnd) | |
123 | |
124 def test_getBacPositions_case3(self): | |
125 tBes1 = ('chr16', 300, 25) | |
126 tBes2 = ('chr16', 1, 50) | |
127 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
128 obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2) | |
129 expStart = 1 | |
130 expEnd = 300 | |
131 self.assertEquals(expStart, obsStart) | |
132 self.assertEquals(expEnd, obsEnd) | |
133 | |
134 def test_createGffLineForBac(self): | |
135 gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n' | |
136 nameBes1 = 'MRRE1H001H13FM1' | |
137 seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG' | |
138 typeBes1 = 'FM' | |
139 gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' | |
140 nameBes2 = 'MRRE1H001H13RM2' | |
141 seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT' | |
142 typeBes2 = 'RM' | |
143 line = 2 | |
144 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
145 iBlatToGffForBesPaired._methodName = '' | |
146 obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line) | |
147 expGffBac = 'chr16\tBlatToGffForBesPaired\tBAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' | |
148 self.assertEquals(expGffBac, obsGffBac) | |
149 | |
150 def test_createGffLineForBac_with_methodName(self): | |
151 gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n' | |
152 nameBes1 = 'MRRE1H001H13FM1' | |
153 seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG' | |
154 typeBes1 = 'FM' | |
155 gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' | |
156 nameBes2 = 'MRRE1H001H13RM2' | |
157 seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT' | |
158 typeBes2 = 'RM' | |
159 line = 2 | |
160 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
161 iBlatToGffForBesPaired._methodName = 'Test' | |
162 obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line) | |
163 expGffBac = 'chr16\tBlatToGffForBesPaired\tTest:BAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n' | |
164 self.assertEquals(expGffBac, obsGffBac) | |
165 | |
166 def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInMultipleLines(self): | |
167 fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH'] | |
168 fastaFile = open(fastaFileName, 'w') | |
169 fastaFile.write('>seq1\n') | |
170 fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n') | |
171 fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n') | |
172 fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n') | |
173 fastaFile.write('ATCGAC\n') | |
174 fastaFile.write('>seq2\n') | |
175 fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n') | |
176 fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n') | |
177 fastaFile.write('ACTGACACTGTACGTAC\n') | |
178 fastaFile.write('>seq3\n') | |
179 fastaFile.write('ACTCGATCGATCG\n') | |
180 fastaFile.close() | |
181 | |
182 seqName = 'seq1' | |
183 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
184 iBlatToGffForBesPaired._inputFileFasta = fastaFileName | |
185 obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5) | |
186 expSeq = 'ATCGATCGATCGATCGATACGTCAGCGATCGATTACGTACGTACGATCGATCGATCGATCGATCGGTACGTACGTACGATCGACGATCGATGCCGATCGATCGAC' | |
187 self.assertEquals(expSeq, obsSeq) | |
188 os.remove(fastaFileName) | |
189 | |
190 def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInUniqueLines(self): | |
191 fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH'] | |
192 fastaFile = open(fastaFileName, 'w') | |
193 fastaFile.write('>seq1\n') | |
194 fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n') | |
195 fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n') | |
196 fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n') | |
197 fastaFile.write('ATCGAC\n') | |
198 fastaFile.write('>seq2\n') | |
199 fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n') | |
200 fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n') | |
201 fastaFile.write('ACTGACACTGTACGTAC\n') | |
202 fastaFile.write('>seq3\n') | |
203 fastaFile.write('ACTCGATCGATCG\n') | |
204 fastaFile.close() | |
205 | |
206 seqName = 'seq3' | |
207 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
208 iBlatToGffForBesPaired._inputFileFasta = fastaFileName | |
209 obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5) | |
210 expSeq = 'ACTCGATCGATCG' | |
211 self.assertEquals(expSeq, obsSeq) | |
212 os.remove(fastaFileName) | |
213 | |
214 def test_extractBesSequenceFromFastaFileToTmpFile_without_seqInThisFastaFile(self): | |
215 fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH'] | |
216 fastaFile = open(fastaFileName, 'w') | |
217 fastaFile.write('>seq1\n') | |
218 fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n') | |
219 fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n') | |
220 fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n') | |
221 fastaFile.write('ATCGAC\n') | |
222 fastaFile.write('>seq2\n') | |
223 fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n') | |
224 fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n') | |
225 fastaFile.write('ACTGACACTGTACGTAC\n') | |
226 fastaFile.write('>seq3\n') | |
227 fastaFile.write('ACTCGATCGATCG\n') | |
228 fastaFile.close() | |
229 | |
230 seqName = 'seq4' | |
231 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
232 iBlatToGffForBesPaired._inputFileFasta = fastaFileName | |
233 obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5) | |
234 expSeq = 'NA' | |
235 self.assertEquals(expSeq, obsSeq) | |
236 os.remove(fastaFileName) | |
237 | |
238 def test_getBesFmAndRmNamesAndSequences_case1(self): | |
239 nameBes1 = 'MRRE1H0072T1FM1' | |
240 seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' | |
241 typeBes1 = 'FM' | |
242 nameBes2 = 'MRRE1H0072T1RM3' | |
243 seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' | |
244 typeBes2 = 'RM' | |
245 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
246 obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2) | |
247 expNameBesFM = 'MRRE1H0072T1FM1' | |
248 expNameBesRM = 'MRRE1H0072T1RM3' | |
249 expSeqBesFM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' | |
250 expSeqBesRM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' | |
251 self.assertEquals(expNameBesFM, obsNameBesFM) | |
252 self.assertEquals(expNameBesRM, obsNameBesRM) | |
253 self.assertEquals(expSeqBesFM, obsSeqBesFM) | |
254 self.assertEquals(expSeqBesRM, obsSeqBesRM) | |
255 | |
256 def test_getBesFmAndRmNamesAndSequences_case2(self): | |
257 nameBes1 = 'MRRE1H0072T1RM1' | |
258 seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' | |
259 typeBes1 = 'RM' | |
260 nameBes2 = 'MRRE1H0072T1FM3' | |
261 seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' | |
262 typeBes2 = 'FM' | |
263 iBlatToGffForBesPaired = BlatToGffForBesPaired() | |
264 obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2) | |
265 expNameBesFM = 'MRRE1H0072T1FM3' | |
266 expNameBesRM = 'MRRE1H0072T1RM1' | |
267 expSeqBesFM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC' | |
268 expSeqBesRM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC' | |
269 self.assertEquals(expNameBesFM, obsNameBesFM) | |
270 self.assertEquals(expNameBesRM, obsNameBesRM) | |
271 self.assertEquals(expSeqBesFM, obsSeqBesFM) | |
272 self.assertEquals(expSeqBesRM, obsSeqBesRM) | |
273 | |
274 def _writeBesSequences(self, fileName): | |
275 file = open(fileName, 'w') | |
276 file.write('>MRRE1H001H13RM1\n') | |
277 file.write('ATACGTACGTACGTCAGTACGACTACGTACGTACGTACGTCGTAC\n') | |
278 file.write('TACGTCAGCATCGTACGTACGTACGTCGTGCTGGCTAGCTGACGA\n') | |
279 file.write('ATCGATCGATCGATCGACATCGTACG\n') | |
280 file.write('>MRRE1H001H13FM1\n') | |
281 file.write('AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGC\n') | |
282 file.write('CTAGCTAGCTAGCTAGCTAGCTAGC\n') | |
283 file.write('>MRRE2H007A13FM3\n') | |
284 file.write('TCAGCTAGCTGACTGACATCGCTAGCTAGCTAGCTAGCTAGCTAG\n') | |
285 file.write('TACGCAGCTACGGGGCATCGACTAAAAAAAAAAACCCACGACTGG\n') | |
286 file.write('CTAGCTAGCTAGCTAGCTAGCTACGTCGATCGATCGACTGTTGCC\n') | |
287 file.write('TCAGCTACTGACTGATCGATCGACTACGTACGTACGTAC\n') | |
288 file.close() | |
289 | |
290 | |
291 if __name__ == "__main__": | |
292 unittest.main() |