comparison commons/core/parsing/test/Test_BlatToGffForBesPaired.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 import unittest, os
2 from commons.core.parsing.BlatToGffForBesPaired import BlatToGffForBesPaired
3
4
5 class Test_BlatToGffForBesPaired(unittest.TestCase):
6
7
8 def test_convertBlatObjectToGffLine(self):
9 blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
10 nbLine = 15
11 besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']
12 self._writeBesSequences(besFastaFileName)
13 iBlatToGffForBesPaired = BlatToGffForBesPaired()
14 iBlatToGffForBesPaired._methodName = ''
15 iBlatToGffForBesPaired._inputFileFasta = besFastaFileName
16 obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)
17 expGffLine = 'chr16\tBlatToGffForBesPaired\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n'
18 expBesName = 'MRRE1H001H13FM1'
19 expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'
20 expBesType = 'FM'
21 self.assertEquals(expGffLine, obsGffLine)
22 self.assertEquals(expBesName, obsBesName)
23 self.assertEquals(expBesSeq, obsBesSeq)
24 self.assertEquals(expBesType, obsBesType)
25 os.remove(besFastaFileName)
26
27 def test_convertBlatObjectToGffLine_with_methodName(self):
28 blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
29 nbLine = 15
30 besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']
31 self._writeBesSequences(besFastaFileName)
32 iBlatToGffForBesPaired = BlatToGffForBesPaired()
33 iBlatToGffForBesPaired._methodName = 'Test'
34 iBlatToGffForBesPaired._inputFileFasta = besFastaFileName
35 obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)
36 expGffLine = 'chr16\tBlatToGffForBesPaired\tTest:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\n'
37 expBesName = 'MRRE1H001H13FM1'
38 expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'
39 expBesType = 'FM'
40 self.assertEquals(expGffLine, obsGffLine)
41 self.assertEquals(expBesName, obsBesName)
42 self.assertEquals(expBesSeq, obsBesSeq)
43 self.assertEquals(expBesType, obsBesType)
44 os.remove(besFastaFileName)
45
46 def test_getBesName(self):
47 col9 = 'ID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n'
48 iBlatToGffForBesPaired = BlatToGffForBesPaired()
49 obsBesName = iBlatToGffForBesPaired.getBesName(col9)
50 expBesName = 'machin1'
51 self.assertEquals(expBesName, obsBesName)
52
53 def test_checkBesNames_OK(self):
54 besName1 = 'MRRE1H001H13FM8'
55 besName2 = 'MRRE1H001H13RM2'
56 line = 10
57 iBlatToGffForBesPaired = BlatToGffForBesPaired()
58 self.assertTrue(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))
59
60 def test_checkBesNames_NOK(self):
61 besName1 = 'MRRE1H001H13FM1'
62 besName2 = 'TOTORM2'
63 line = 10
64 iBlatToGffForBesPaired = BlatToGffForBesPaired()
65 self.assertFalse(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))
66
67 def test_checkBesPositions_OK1(self):
68 tBes1 = ('chr16', 25, 150)
69 tBes2 = ('chr16', 300, 350)
70 iBlatToGffForBesPaired = BlatToGffForBesPaired()
71 self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
72
73 def test_checkBesPositions_OK2(self):
74 tBes1 = ('chr16', 300, 350)
75 tBes2 = ('chr16', 3, 50)
76 iBlatToGffForBesPaired = BlatToGffForBesPaired()
77 self.assertTrue(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
78
79 def test_checkBesPositions_NOK1(self):
80 tBes1 = ('chr16', 25, 150)
81 tBes2 = ('chr14', 300, 350)
82 iBlatToGffForBesPaired = BlatToGffForBesPaired()
83 self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
84
85 def test_checkBesPositions_NOK2(self):
86 tBes1 = ('chr16', 25, 300)
87 tBes2 = ('chr16', 150, 350)
88 iBlatToGffForBesPaired = BlatToGffForBesPaired()
89 self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
90
91 def test_checkBesPositions_NOK3(self):
92 tBes1 = ('chr16', 25, 300)
93 tBes2 = ('chr16', 1, 50)
94 iBlatToGffForBesPaired = BlatToGffForBesPaired()
95 self.assertFalse(iBlatToGffForBesPaired.checkBesPositions(tBes1, tBes2))
96
97 def test_getBacName(self):
98 besName = 'MRRE1H001H13FM1'
99 iBlatToGffForBesPaired = BlatToGffForBesPaired()
100 obsBacName = iBlatToGffForBesPaired.getBacName(besName)
101 expBacName = 'MRRE1H001H13'
102 self.assertEquals(expBacName, obsBacName)
103
104 def test_getBacPositions_case1(self):
105 tBes1 = ('chr16', 25, 300)
106 tBes2 = ('chr16', 1, 50)
107 iBlatToGffForBesPaired = BlatToGffForBesPaired()
108 obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2)
109 expStart = 1
110 expEnd = 300
111 self.assertEquals(expStart, obsStart)
112 self.assertEquals(expEnd, obsEnd)
113
114 def test_getBacPositions_case2(self):
115 tBes1 = ('chr16', 1, 300)
116 tBes2 = ('chr16', 1000, 50000)
117 iBlatToGffForBesPaired = BlatToGffForBesPaired()
118 obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2)
119 expStart = 1
120 expEnd = 50000
121 self.assertEquals(expStart, obsStart)
122 self.assertEquals(expEnd, obsEnd)
123
124 def test_getBacPositions_case3(self):
125 tBes1 = ('chr16', 300, 25)
126 tBes2 = ('chr16', 1, 50)
127 iBlatToGffForBesPaired = BlatToGffForBesPaired()
128 obsStart, obsEnd = iBlatToGffForBesPaired.getBacPositions(tBes1, tBes2)
129 expStart = 1
130 expEnd = 300
131 self.assertEquals(expStart, obsStart)
132 self.assertEquals(expEnd, obsEnd)
133
134 def test_createGffLineForBac(self):
135 gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n'
136 nameBes1 = 'MRRE1H001H13FM1'
137 seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG'
138 typeBes1 = 'FM'
139 gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
140 nameBes2 = 'MRRE1H001H13RM2'
141 seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT'
142 typeBes2 = 'RM'
143 line = 2
144 iBlatToGffForBesPaired = BlatToGffForBesPaired()
145 iBlatToGffForBesPaired._methodName = ''
146 obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line)
147 expGffBac = 'chr16\tBlatToGffForBesPaired\tBAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
148 self.assertEquals(expGffBac, obsGffBac)
149
150 def test_createGffLineForBac_with_methodName(self):
151 gffLine1 = 'chr16\tBlatToGffForBesPaired\tBES\t10\t1000\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=10;bes_end=1000;bes_size=991;muscadine_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG\n'
152 nameBes1 = 'MRRE1H001H13FM1'
153 seqBes1 = 'ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG'
154 typeBes1 = 'FM'
155 gffLine2 = 'chr16\tBlatToGffForBesPaired\tBES\t2000\t3000\t.\t+\t.\tID=MRRE1H001H13RM2;Name=MRRE1H001H13RM2;bes_start=2000;bes_end=3000;bes_size=1001;muscadine_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
156 nameBes2 = 'MRRE1H001H13RM2'
157 seqBes2 = 'CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT'
158 typeBes2 = 'RM'
159 line = 2
160 iBlatToGffForBesPaired = BlatToGffForBesPaired()
161 iBlatToGffForBesPaired._methodName = 'Test'
162 obsGffBac = iBlatToGffForBesPaired.createGffLineForBac(gffLine1, nameBes1, seqBes1, typeBes1, gffLine2, nameBes2, seqBes2, typeBes2, line)
163 expGffBac = 'chr16\tBlatToGffForBesPaired\tTest:BAC\t10\t3000\t.\t.\t.\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=10;bac_end=3000;bac_size=2991;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGTACGACTGACTCGATCAGCTAGCTAGCTAGCACATCG;besRM_name=MRRE1H001H13RM2;muscadine_besRM_seq=CAGCTAGCTACGTACGTACGTACGTAGCATCGATCGAT\n'
164 self.assertEquals(expGffBac, obsGffBac)
165
166 def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInMultipleLines(self):
167 fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH']
168 fastaFile = open(fastaFileName, 'w')
169 fastaFile.write('>seq1\n')
170 fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n')
171 fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n')
172 fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n')
173 fastaFile.write('ATCGAC\n')
174 fastaFile.write('>seq2\n')
175 fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n')
176 fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n')
177 fastaFile.write('ACTGACACTGTACGTAC\n')
178 fastaFile.write('>seq3\n')
179 fastaFile.write('ACTCGATCGATCG\n')
180 fastaFile.close()
181
182 seqName = 'seq1'
183 iBlatToGffForBesPaired = BlatToGffForBesPaired()
184 iBlatToGffForBesPaired._inputFileFasta = fastaFileName
185 obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)
186 expSeq = 'ATCGATCGATCGATCGATACGTCAGCGATCGATTACGTACGTACGATCGATCGATCGATCGATCGGTACGTACGTACGATCGACGATCGATGCCGATCGATCGAC'
187 self.assertEquals(expSeq, obsSeq)
188 os.remove(fastaFileName)
189
190 def test_extractBesSequenceFromFastaFileToTmpFile_with_seqInUniqueLines(self):
191 fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH']
192 fastaFile = open(fastaFileName, 'w')
193 fastaFile.write('>seq1\n')
194 fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n')
195 fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n')
196 fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n')
197 fastaFile.write('ATCGAC\n')
198 fastaFile.write('>seq2\n')
199 fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n')
200 fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n')
201 fastaFile.write('ACTGACACTGTACGTAC\n')
202 fastaFile.write('>seq3\n')
203 fastaFile.write('ACTCGATCGATCG\n')
204 fastaFile.close()
205
206 seqName = 'seq3'
207 iBlatToGffForBesPaired = BlatToGffForBesPaired()
208 iBlatToGffForBesPaired._inputFileFasta = fastaFileName
209 obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)
210 expSeq = 'ACTCGATCGATCG'
211 self.assertEquals(expSeq, obsSeq)
212 os.remove(fastaFileName)
213
214 def test_extractBesSequenceFromFastaFileToTmpFile_without_seqInThisFastaFile(self):
215 fastaFileName = '%s/commons/core/parsing/test/sequence.fasta' % os.environ['REPET_PATH']
216 fastaFile = open(fastaFileName, 'w')
217 fastaFile.write('>seq1\n')
218 fastaFile.write('ATCGATCGATCGATCGATACGTCAGCGATCGAT\n')
219 fastaFile.write('TACGTACGTACGATCGATCGATCGATCGATCGG\n')
220 fastaFile.write('TACGTACGTACGATCGACGATCGATGCCGATCG\n')
221 fastaFile.write('ATCGAC\n')
222 fastaFile.write('>seq2\n')
223 fastaFile.write('GTCTAGCTAGCTATATCTGACTGACGCGACGGT\n')
224 fastaFile.write('CATGCTAGCTAGCACTGTACAGCTATCGATGCT\n')
225 fastaFile.write('ACTGACACTGTACGTAC\n')
226 fastaFile.write('>seq3\n')
227 fastaFile.write('ACTCGATCGATCG\n')
228 fastaFile.close()
229
230 seqName = 'seq4'
231 iBlatToGffForBesPaired = BlatToGffForBesPaired()
232 iBlatToGffForBesPaired._inputFileFasta = fastaFileName
233 obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)
234 expSeq = 'NA'
235 self.assertEquals(expSeq, obsSeq)
236 os.remove(fastaFileName)
237
238 def test_getBesFmAndRmNamesAndSequences_case1(self):
239 nameBes1 = 'MRRE1H0072T1FM1'
240 seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
241 typeBes1 = 'FM'
242 nameBes2 = 'MRRE1H0072T1RM3'
243 seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
244 typeBes2 = 'RM'
245 iBlatToGffForBesPaired = BlatToGffForBesPaired()
246 obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)
247 expNameBesFM = 'MRRE1H0072T1FM1'
248 expNameBesRM = 'MRRE1H0072T1RM3'
249 expSeqBesFM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
250 expSeqBesRM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
251 self.assertEquals(expNameBesFM, obsNameBesFM)
252 self.assertEquals(expNameBesRM, obsNameBesRM)
253 self.assertEquals(expSeqBesFM, obsSeqBesFM)
254 self.assertEquals(expSeqBesRM, obsSeqBesRM)
255
256 def test_getBesFmAndRmNamesAndSequences_case2(self):
257 nameBes1 = 'MRRE1H0072T1RM1'
258 seqBes1 = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
259 typeBes1 = 'RM'
260 nameBes2 = 'MRRE1H0072T1FM3'
261 seqBes2 = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
262 typeBes2 = 'FM'
263 iBlatToGffForBesPaired = BlatToGffForBesPaired()
264 obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)
265 expNameBesFM = 'MRRE1H0072T1FM3'
266 expNameBesRM = 'MRRE1H0072T1RM1'
267 expSeqBesFM = 'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC'
268 expSeqBesRM = 'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC'
269 self.assertEquals(expNameBesFM, obsNameBesFM)
270 self.assertEquals(expNameBesRM, obsNameBesRM)
271 self.assertEquals(expSeqBesFM, obsSeqBesFM)
272 self.assertEquals(expSeqBesRM, obsSeqBesRM)
273
274 def _writeBesSequences(self, fileName):
275 file = open(fileName, 'w')
276 file.write('>MRRE1H001H13RM1\n')
277 file.write('ATACGTACGTACGTCAGTACGACTACGTACGTACGTACGTCGTAC\n')
278 file.write('TACGTCAGCATCGTACGTACGTACGTCGTGCTGGCTAGCTGACGA\n')
279 file.write('ATCGATCGATCGATCGACATCGTACG\n')
280 file.write('>MRRE1H001H13FM1\n')
281 file.write('AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGC\n')
282 file.write('CTAGCTAGCTAGCTAGCTAGCTAGC\n')
283 file.write('>MRRE2H007A13FM3\n')
284 file.write('TCAGCTAGCTGACTGACATCGCTAGCTAGCTAGCTAGCTAGCTAG\n')
285 file.write('TACGCAGCTACGGGGCATCGACTAAAAAAAAAAACCCACGACTGG\n')
286 file.write('CTAGCTAGCTAGCTAGCTAGCTACGTCGATCGATCGACTGTTGCC\n')
287 file.write('TCAGCTACTGACTGATCGATCGACTACGTACGTACGTAC\n')
288 file.close()
289
290
291 if __name__ == "__main__":
292 unittest.main()