18
|
1 import unittest
|
|
2 import os
|
|
3 from commons.core.utils.FileUtils import FileUtils
|
|
4 from commons.core.sql.DbMySql import DbMySql
|
|
5 from commons.tools.GFF3Maker import GFF3Maker
|
|
6
|
|
7 class Test_F_GFF3Maker(unittest.TestCase):
|
|
8
|
|
9 def setUp(self):
|
|
10 self._projectName = "projectName"
|
|
11 self._iDb = DbMySql()
|
|
12 self._tablesFileName = "annotation_tables.txt"
|
|
13 self._fastaFileName = "%s_chr.fa" % self._projectName
|
|
14 self._fastaTableName = "%s_chr_seq" % self._projectName
|
|
15 self._writeFastaFile(self._fastaFileName)
|
|
16 self._iDb.createTable(self._fastaTableName, "seq", self._fastaFileName, True)
|
|
17 self._inputFileName = "%s_chr_allTEs_nr_noSSR_join.pathOrSet" % self._projectName
|
|
18 self._expGFFFileName = "explm_SuperContig_29_v2.gff3"
|
|
19 self._obsGFFFileName = "lm_SuperContig_29_v2.gff3"
|
|
20 self._obsGFFEmptyFileName = "lm_SuperContig_30_v2.gff3"
|
|
21
|
|
22 def tearDown(self):
|
|
23 self._iDb.dropTable(self._fastaTableName)
|
|
24 self._iDb.dropTable(self._inputTableName)
|
|
25 self._iDb.close()
|
|
26 os.remove(self._obsGFFFileName)
|
|
27 os.remove(self._expGFFFileName)
|
|
28 os.remove(self._fastaFileName)
|
|
29 os.remove(self._tablesFileName)
|
|
30 os.remove(self._inputFileName)
|
|
31
|
|
32 def test_run_as_script_path_with_seq_withAllFiles(self):
|
|
33 self._writeTablesFile("path")
|
|
34 self._writePathFile(self._inputFileName)
|
|
35 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
|
|
36 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
|
|
37 self._writeExpPathGFFFile(self._expGFFFileName)
|
|
38 expGFFEmptyFileName = "explm_SuperContig_30_v2.gff3"
|
|
39 self._writeExpEmptyPathGFFFileWithSeq(expGFFEmptyFileName)
|
|
40
|
|
41 cmd = "GFF3Maker.py -t %s -f %s -w -a -p"% (self._tablesFileName, self._fastaTableName)
|
|
42 os.system(cmd)
|
|
43
|
|
44 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
45 self.assertTrue(FileUtils.are2FilesIdentical(expGFFEmptyFileName, self._obsGFFEmptyFileName))
|
|
46
|
|
47 os.remove(expGFFEmptyFileName)
|
|
48 os.remove(self._obsGFFEmptyFileName)
|
|
49
|
|
50 def test_run_as_script_path_without_seq_withAllFiles(self):
|
|
51 self._writeTablesFile("path")
|
|
52 self._writePathFile(self._inputFileName)
|
|
53 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
|
|
54 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
|
|
55 self._writeExpPathGFFFile_without_seq(self._expGFFFileName)
|
|
56 expGFFEmptyFileName = "explm_SuperContig_30_v2.gff3"
|
|
57 self._writeExpEmptyPathGFFFile(expGFFEmptyFileName)
|
|
58
|
|
59 cmd = "GFF3Maker.py -t %s -f %s -a -p" % (self._tablesFileName, self._fastaTableName)
|
|
60 os.system(cmd)
|
|
61
|
|
62 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
63 self.assertTrue(FileUtils.are2FilesIdentical(expGFFEmptyFileName, self._obsGFFEmptyFileName))
|
|
64
|
|
65 os.remove(expGFFEmptyFileName)
|
|
66 os.remove(self._obsGFFEmptyFileName)
|
|
67
|
|
68 def test_run_without_seq(self):
|
|
69 self._writeTablesFile("path")
|
|
70 self._writePathFile(self._inputFileName)
|
|
71 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
|
|
72 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
|
|
73 self._writeExpPathGFFFile_without_seq(self._expGFFFileName)
|
|
74
|
|
75 iGFF3Maker = GFF3Maker()
|
|
76 iGFF3Maker.setTablesFileName(self._tablesFileName)
|
|
77 iGFF3Maker.setInFastaName(self._fastaTableName)
|
|
78 iGFF3Maker.setAreMatchPartCompulsory(True)
|
|
79 iGFF3Maker.run()
|
|
80
|
|
81 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
82 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName))
|
|
83
|
|
84 def test_run_without_seq_and_match_part_not_compulsory(self):
|
|
85 self._writeTablesFile("path")
|
|
86 self._writePathFile(self._inputFileName)
|
|
87 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
|
|
88 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
|
|
89 self._writeExpPathGFFFile_without_seq_and_match_part_not_comulsory(self._expGFFFileName)
|
|
90
|
|
91 iGFF3Maker = GFF3Maker()
|
|
92 iGFF3Maker.setTablesFileName(self._tablesFileName)
|
|
93 iGFF3Maker.setInFastaName(self._fastaTableName)
|
|
94 iGFF3Maker.run()
|
|
95
|
|
96 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
97 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName))
|
|
98
|
|
99 def test_run_as_script_pathReversed(self):
|
|
100 self._writeTablesFile("path")
|
|
101 self._writePathFileReverse(self._inputFileName)
|
|
102 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
|
|
103 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
|
|
104 self._writeExpPathGFFFileReversed(self._expGFFFileName)
|
|
105
|
|
106 cmd = "GFF3Maker.py -t %s -f %s -w -p" % (self._tablesFileName, self._fastaTableName)
|
|
107 os.system(cmd)
|
|
108
|
|
109 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
110 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName))
|
|
111
|
|
112 def test_run_as_script_set(self):
|
|
113 self._writeTablesFile("set")
|
|
114 self._writeSetFile(self._inputFileName)
|
|
115 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_set" % self._projectName
|
|
116 self._iDb.createTable(self._inputTableName, "set", self._inputFileName, True)
|
|
117 self._writeExpSetGFFFile(self._expGFFFileName)
|
|
118
|
|
119 cmd = "GFF3Maker.py -t %s -f %s -w -p " % (self._tablesFileName, self._fastaTableName)
|
|
120 os.system(cmd)
|
|
121
|
|
122 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
123 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName))
|
|
124
|
|
125 def test_run_as_script_setReversed(self):
|
|
126 self._writeTablesFile("set")
|
|
127 self._writeSetFileReverse(self._inputFileName)
|
|
128 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_set" % self._projectName
|
|
129 self._iDb.createTable(self._inputTableName, "set", self._inputFileName, True)
|
|
130 self._writeExpSetGFFFileReversed(self._expGFFFileName)
|
|
131
|
|
132 cmd = "GFF3Maker.py -t %s -f %s -w -p " % (self._tablesFileName, self._fastaTableName)
|
|
133 os.system(cmd)
|
|
134
|
|
135 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
136 self.assertFalse(FileUtils.isRessourceExists(self._obsGFFEmptyFileName))
|
|
137
|
|
138 def test_run_as_script_path_without_seq_withAllFilesWithClassif_headers_TEdenovo_step6_and_step8(self):
|
|
139 self._writeTablesFile("path")
|
|
140 self._writePathFile_withClassif(self._inputFileName)
|
|
141 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
|
|
142 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
|
|
143
|
|
144 self._writeClassifFile("input.classif")
|
|
145 inputClassifTableName = "%s_classif" % self._projectName
|
|
146 self._iDb.createTable(inputClassifTableName, "classif", "input.classif", True)
|
|
147
|
|
148 self._expGFFFileName = "explm_SuperContig_30_v2.gff3"
|
|
149 self._obsGFFFileName = "lm_SuperContig_30_v2.gff3"
|
|
150 self._writeExpPathGFFFile_without_seq_withClassif(self._expGFFFileName)
|
|
151
|
|
152 cmd = "GFF3Maker.py -t %s -f %s -i %s -p" % (self._tablesFileName, self._fastaTableName, inputClassifTableName)
|
|
153 os.system(cmd)
|
|
154
|
|
155 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
156
|
|
157 os.remove("input.classif")
|
|
158 os.remove("lm_SuperContig_29_v2.gff3")
|
|
159 self._iDb.dropTable(inputClassifTableName)
|
|
160
|
|
161 def test_run_as_script_path_without_seq_withAllFilesWithTargetLength(self):
|
|
162 fastaFileName = "genome.fa"
|
|
163 self._writeFastaFileExtended(fastaFileName)
|
|
164 self._iDb.createTable(self._fastaTableName, "seq", fastaFileName, True)
|
|
165 os.remove(fastaFileName)
|
|
166
|
|
167 self._writeTablesFile_withTESeqTables("path")
|
|
168
|
|
169 self._writePathFile(self._inputFileName)
|
|
170 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
|
|
171 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
|
|
172
|
|
173 inFileName2 = "%s_chr_bankBLRtx.path" % self._projectName
|
|
174 self._writePathFile2(inFileName2)
|
|
175 bankPathTableName = "%s_chr_bankBLRtx_path" % self._projectName
|
|
176 self._iDb.createTable(bankPathTableName, "path", inFileName2, True)
|
|
177 os.remove(inFileName2)
|
|
178
|
|
179 refTEsFastaFileName = "%s_refTEs.fa" % self._projectName
|
|
180 self._writeRefTEsSeqFile(refTEsFastaFileName)
|
|
181 refTESeqTableName = "%s_refTEs_seq" % self._projectName
|
|
182 self._iDb.createTable(refTESeqTableName, "seq", refTEsFastaFileName, True)
|
|
183 os.remove(refTEsFastaFileName)
|
|
184
|
|
185 bankBLRtxFastaFileName = "dummyRepbase.fa"
|
|
186 self._writeBankBLRtxSeqFile(bankBLRtxFastaFileName)
|
|
187 bankBLRtxTableName = "%s_bankBLRtx_nt_seq" % self._projectName
|
|
188 self._iDb.createTable(bankBLRtxTableName, "seq", bankBLRtxFastaFileName, True)
|
|
189 os.remove(bankBLRtxFastaFileName)
|
|
190
|
|
191 self._expGFFFileName = "explm_SuperContig_29_v2.gff3"
|
|
192 expSeq2GFFFileName = "expChr1.gff3"
|
|
193 self._obsGFFFileName = "lm_SuperContig_29_v2.gff3"
|
|
194 obsSeq2GFFFileName = "chr1.gff3"
|
|
195 self._writeExpPathGFFFile_without_seq_withTargetLength_seq1(self._expGFFFileName)
|
|
196 self._writeExpPathGFFFile_without_seq_withTargetLength_seq2(expSeq2GFFFileName)
|
|
197
|
|
198 cmd = "GFF3Maker.py -t %s -f %s -p" % (self._tablesFileName, self._fastaTableName)
|
|
199 os.system(cmd)
|
|
200
|
|
201 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
202 self.assertTrue(FileUtils.are2FilesIdentical(expSeq2GFFFileName, obsSeq2GFFFileName))
|
|
203
|
|
204 os.remove(expSeq2GFFFileName)
|
|
205 os.remove(obsSeq2GFFFileName)
|
|
206 self._iDb.dropTable(refTESeqTableName)
|
|
207 self._iDb.dropTable(bankBLRtxTableName)
|
|
208 self._iDb.dropTable(bankPathTableName)
|
|
209
|
|
210 def test_run_as_script_split_file_by_annotation_method(self):
|
|
211 fastaFileName = "dummyDmelChr4.fa"
|
|
212 self._writeFastaFile_DmelChr4(fastaFileName)
|
|
213 self._iDb.createTable(self._fastaTableName, "seq", fastaFileName, True)
|
|
214 os.remove(fastaFileName)
|
|
215
|
|
216 self._writeTablesFile_withTESeqTables("path")
|
|
217
|
|
218 self._writePathFile_refTEs_annotation(self._inputFileName)
|
|
219 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
|
|
220 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
|
|
221
|
|
222 inFileName2 = "%s_chr_bankBLRtx.path" % self._projectName
|
|
223 self._writePathFile_bankBLRtx_annotation(inFileName2)
|
|
224 bankPathTableName = "%s_chr_bankBLRtx_path" % self._projectName
|
|
225 self._iDb.createTable(bankPathTableName, "path", inFileName2, True)
|
|
226 os.remove(inFileName2)
|
|
227
|
|
228 refTEsFastaFileName = "%s_refTEs.fa" % self._projectName
|
|
229 self._writeRefTEsSeqFile(refTEsFastaFileName)
|
|
230 refTESeqTableName = "%s_refTEs_seq" % self._projectName
|
|
231 self._iDb.createTable(refTESeqTableName, "seq", refTEsFastaFileName, True)
|
|
232 os.remove(refTEsFastaFileName)
|
|
233
|
|
234 bankBLRtxFastaFileName = "dummyRepbase.fa"
|
|
235 self._writeBankBLRtxSeqFile(bankBLRtxFastaFileName)
|
|
236 bankBLRtxTableName = "%s_bankBLRtx_nt_seq" % self._projectName
|
|
237 self._iDb.createTable(bankBLRtxTableName, "seq", bankBLRtxFastaFileName, True)
|
|
238 os.remove(bankBLRtxFastaFileName)
|
|
239
|
|
240 self._expGFFFileName = "expdmel_chr4_Annot1.gff3"
|
|
241 expGFFFileName2 = "expdmel_chr1_Annot1.gff3"
|
|
242 expGFFFileName3 = "expdmel_chr4_Annot2.gff3"
|
|
243 self._obsGFFFileName = "dmel_chr4_Annot1.gff3"
|
|
244 obsGFFFileName2 = "dmel_chr1_Annot1.gff3"
|
|
245 obsGFFFileName3 = "dmel_chr4_Annot2.gff3"
|
|
246 self._writeExpPathGFFFile_split_file1(self._expGFFFileName)
|
|
247 self._writeExpPathGFFFile_split_file2(expGFFFileName2)
|
|
248 self._writeExpPathGFFFile_split_file3(expGFFFileName3)
|
|
249
|
|
250 cmd = "GFF3Maker.py -t %s -f %s -s -p" % (self._tablesFileName, self._fastaTableName)
|
|
251 os.system(cmd)
|
|
252
|
|
253 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
254 self.assertTrue(FileUtils.are2FilesIdentical(expGFFFileName2, obsGFFFileName2))
|
|
255 self.assertTrue(FileUtils.are2FilesIdentical(expGFFFileName3, obsGFFFileName3))
|
|
256
|
|
257 os.remove(expGFFFileName2)
|
|
258 os.remove(expGFFFileName3)
|
|
259 os.remove(obsGFFFileName2)
|
|
260 os.remove(obsGFFFileName3)
|
|
261 self._iDb.dropTable(refTESeqTableName)
|
|
262 self._iDb.dropTable(bankBLRtxTableName)
|
|
263 self._iDb.dropTable(bankPathTableName)
|
|
264
|
|
265 def test_run_path_without_seq_withAllFilesWithClassif_headers_TEdenovo_step6_and_step8_withIdenticalMatches(self):
|
|
266 self._writeTablesFile("path")
|
|
267 self._writePathFile_withClassif_withIdenticalMatches(self._inputFileName)
|
|
268 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
|
|
269 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
|
|
270
|
|
271 self._writeClassifFile("input.classif")
|
|
272 inputClassifTableName = "%s_classif" % self._projectName
|
|
273 self._iDb.createTable(inputClassifTableName, "classif", "input.classif", True)
|
|
274
|
|
275 self._expGFFFileName = "explm_SuperContig_30_v2.gff3"
|
|
276 self._obsGFFFileName = "lm_SuperContig_30_v2.gff3"
|
|
277 self._writeExpPathGFFFile_without_seq_withClassif_withIdenticalMatches(self._expGFFFileName)
|
|
278
|
|
279 iGFF3Maker = GFF3Maker()
|
|
280 iGFF3Maker.setTablesFileName(self._tablesFileName)
|
|
281 iGFF3Maker.setInFastaName(self._fastaTableName)
|
|
282 iGFF3Maker.setClassifTable(inputClassifTableName)
|
|
283 iGFF3Maker.setDoMergeIdenticalMatches(True)
|
|
284 iGFF3Maker.setAreMatchPartCompulsory(True)
|
|
285 iGFF3Maker.run()
|
|
286
|
|
287 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
288
|
|
289 os.remove("input.classif")
|
|
290 os.remove("lm_SuperContig_29_v2.gff3")
|
|
291 self._iDb.dropTable(inputClassifTableName)
|
|
292
|
|
293 def test_run_as_script_path_without_seq_withAllFilesWithClassif_headers_TEdenovo_step6_and_step8_withIdenticalMatches(self):
|
|
294 self._writeTablesFile("path")
|
|
295 self._writePathFile_withClassif_withIdenticalMatches(self._inputFileName)
|
|
296 self._inputTableName = "%s_chr_allTEs_nr_noSSR_join_path" % self._projectName
|
|
297 self._iDb.createTable(self._inputTableName, "path", self._inputFileName, True)
|
|
298
|
|
299 self._writeClassifFile("input.classif")
|
|
300 inputClassifTableName = "%s_classif" % self._projectName
|
|
301 self._iDb.createTable(inputClassifTableName, "classif", "input.classif", True)
|
|
302
|
|
303 self._expGFFFileName = "explm_SuperContig_30_v2.gff3"
|
|
304 self._obsGFFFileName = "lm_SuperContig_30_v2.gff3"
|
|
305 self._writeExpPathGFFFile_without_seq_withClassif_withIdenticalMatches(self._expGFFFileName)
|
|
306
|
|
307 cmd = "GFF3Maker.py -t %s -f %s -i %s -m -p" % (self._tablesFileName, self._fastaTableName, inputClassifTableName)
|
|
308 os.system(cmd)
|
|
309
|
|
310 self.assertTrue(FileUtils.are2FilesIdentical(self._expGFFFileName, self._obsGFFFileName))
|
|
311
|
|
312 os.remove("input.classif")
|
|
313 os.remove("lm_SuperContig_29_v2.gff3")
|
|
314 self._iDb.dropTable(inputClassifTableName)
|
|
315
|
|
316 def _writeRefTEsSeqFile(self, inFileName):
|
|
317 with open(inFileName, "w") as f:
|
|
318 f.write(">DTX-incomp_DmelChr4-L-B1-Map3\n")
|
|
319 f.write("ATCGATCGTT\n")
|
|
320 f.write(">DTX-incomp_DmelChr4-B-P0.0-Map3\n")
|
|
321 f.write("GCTAGCTA\n")
|
|
322
|
|
323 def _writeBankBLRtxSeqFile(self, inFileName):
|
|
324 with open(inFileName, "w") as f:
|
|
325 f.write(">PROTOP_B:classII:TIR\n")
|
|
326 f.write("ATCGATCGTT\n")
|
|
327 f.write(">DMRT1C:classI:?\n")
|
|
328 f.write("GCTAGCTA\n")
|
|
329 f.write(">BATUMI_I:classI:LTR_retrotransposon\n")
|
|
330 f.write("GCTAATGGCATA\n")
|
|
331
|
|
332 def _writeExpPathGFFFile_without_seq_withTargetLength_seq1(self, inFileName):
|
|
333 with open(inFileName, "w") as f:
|
|
334 f.write("##gff-version 3\n")
|
|
335 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
|
|
336 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;TargetLength=10;Identity=30.56\n")
|
|
337 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
|
|
338 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;TargetLength=10;Identity=23.99\n")
|
|
339 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
|
|
340 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;TargetLength=8\n")
|
|
341 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
|
|
342 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
|
|
343
|
|
344 def _writeExpPathGFFFile_without_seq_withTargetLength_seq2(self, inFileName):
|
|
345 with open(inFileName, "w") as f:
|
|
346 f.write("##gff-version 3\n")
|
|
347 f.write("##sequence-region chr1 1 18\n")
|
|
348 f.write("chr1\tprojectName_REPET_TEs\tmatch\t1\t100\t0.0\t-\t.\tID=ms1_chr1_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 85 228;TargetLength=10;Identity=30.56\n")
|
|
349 f.write("chr1\tprojectName_REPET_TEs\tmatch_part\t1\t100\t1e-20\t-\t.\tID=mp1-1_chr1_PROTOP_B:classII:TIR;Parent=ms1_chr1_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 85 228;Identity=30.56\n")
|
|
350 f.write("chr1\tprojectName_REPET_TEs\tmatch\t100\t1000\t0.0\t-\t.\tID=ms2_chr1_DMRT1C:classI:?;Target=DMRT1C:classI:? 85 228;TargetLength=8;Identity=30.56\n")
|
|
351 f.write("chr1\tprojectName_REPET_TEs\tmatch_part\t100\t1000\t1e-30\t-\t.\tID=mp2-1_chr1_DMRT1C:classI:?;Parent=ms2_chr1_DMRT1C:classI:?;Target=DMRT1C:classI:? 85 228;Identity=30.56\n")
|
|
352
|
|
353 def _writeExpPathGFFFile_split_file1(self, inFileName):
|
|
354 with open(inFileName, 'w') as f:
|
|
355 f.write("##gff-version 3\n")
|
|
356 f.write("##sequence-region dmel_chr4 1 18\n")
|
|
357 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4630\t4889\t0.0\t+\t.\tID=ms21_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 232 512;TargetLength=8\n" % self._projectName)
|
|
358 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4630\t4704\t0.0\t+\t.\tID=mp21-1_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms21_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 232 312;Identity=84.4417\n" % self._projectName)
|
|
359 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4837\t4889\t0.0\t+\t.\tID=mp21-2_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms21_dmel_chr4_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 456 512;Identity=84.4417\n" % self._projectName)
|
|
360 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4364\t4611\t0.0\t+\t.\tID=ms6_dmel_chr4_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;TargetLength=10;Identity=91.24\n" % self._projectName)
|
|
361 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4364\t4611\t0.0\t+\t.\tID=mp6-1_dmel_chr4_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms6_dmel_chr4_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;Identity=91.24\n" % self._projectName)
|
|
362
|
|
363 def _writeExpPathGFFFile_split_file2(self, inFileName):
|
|
364 with open(inFileName, 'w') as f:
|
|
365 f.write("##gff-version 3\n")
|
|
366 f.write("##sequence-region dmel_chr1 1 25\n")
|
|
367 f.write("dmel_chr1\t%s_REPET_TEs\tmatch\t4364\t4611\t0.0\t+\t.\tID=ms35_dmel_chr1_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;TargetLength=10;Identity=91.24\n" % self._projectName)
|
|
368 f.write("dmel_chr1\t%s_REPET_TEs\tmatch_part\t4364\t4611\t0.0\t+\t.\tID=mp35-1_dmel_chr1_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms35_dmel_chr1_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 854 1150;Identity=91.24\n" % self._projectName)
|
|
369
|
|
370 def _writeExpPathGFFFile_split_file3(self, inFileName):
|
|
371 with open(inFileName, 'w') as f:
|
|
372 f.write("##gff-version 3\n")
|
|
373 f.write("##sequence-region dmel_chr4 1 18\n")
|
|
374 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t3143\t4364\t0.0\t-\t.\tID=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 24 2206;TargetLength=8\n" % self._projectName)
|
|
375 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3143\t3361\t0.0\t-\t.\tID=mp66-1_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1988 2206;Identity=89.7202\n" % self._projectName)
|
|
376 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3345\t3410\t0.0\t-\t.\tID=mp66-2_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1654 1719;Identity=54.55\n" % self._projectName)
|
|
377 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3448\t3572\t0.0\t-\t.\tID=mp66-3_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1495 1619;Identity=77.3433\n" % self._projectName)
|
|
378 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3564\t3693\t0.0\t-\t.\tID=mp66-4_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 1380 1509;Identity=83.7306\n" % self._projectName)
|
|
379 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3705\t3857\t0.0\t-\t.\tID=mp66-5_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 539 691;Identity=84.5762\n" % self._projectName)
|
|
380 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3861\t3992\t0.0\t-\t.\tID=mp66-6_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 403 534;Identity=67.1422\n" % self._projectName)
|
|
381 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t3985\t4106\t0.0\t-\t.\tID=mp66-7_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 290 411;Identity=76.612\n" % self._projectName)
|
|
382 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4102\t4248\t0.0\t-\t.\tID=mp66-8_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 139 285;Identity=75.3027\n" % self._projectName)
|
|
383 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4236\t4364\t0.0\t-\t.\tID=mp66-9_dmel_chr4_DMRT1C:classI:?;Parent=ms66_dmel_chr4_DMRT1C:classI:?;Target=DMRT1C:classI:? 24 152;Identity=66.6657\n" % self._projectName)
|
|
384 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4412\t4889\t0.0\t-\t.\tID=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 553 1082;TargetLength=10\n" % self._projectName)
|
|
385 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4412\t4501\t5e-108\t-\t.\tID=mp27-1_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 993 1082;Identity=91.3066\n" % self._projectName)
|
|
386 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4483\t4652\t9e-146\t-\t.\tID=mp27-2_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 821 990;Identity=86.5572\n" % self._projectName)
|
|
387 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4672\t4775\t9e-146\t-\t.\tID=mp27-3_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 684 787;Identity=88.8013\n" % self._projectName)
|
|
388 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4782\t4889\t9e-141\t-\t.\tID=mp27-4_dmel_chr4_PROTOP_B:classII:TIR;Parent=ms27_dmel_chr4_PROTOP_B:classII:TIR;Target=PROTOP_B:classII:TIR 553 661;Identity=53.7067\n" % self._projectName)
|
|
389 f.write("dmel_chr4\t%s_REPET_TEs\tmatch\t4917\t5195\t0.0\t+\t.\tID=ms141_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Target=BATUMI_I:classI:LTR_retrotransposon 7030 7303;TargetLength=12\n" % self._projectName)
|
|
390 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4917\t5007\t2e-144\t+\t.\tID=mp141-1_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Parent=ms141_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Target=BATUMI_I:classI:LTR_retrotransposon 7030 7120;Identity=94.1756\n" % self._projectName)
|
|
391 f.write("dmel_chr4\t%s_REPET_TEs\tmatch_part\t4976\t5195\t2e-144\t+\t.\tID=mp141-2_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Parent=ms141_dmel_chr4_BATUMI_I:classI:LTR_retrotransposon;Target=BATUMI_I:classI:LTR_retrotransposon 7087 7303;Identity=82.2343\n" % self._projectName)
|
|
392
|
|
393 def _writeClassifFile(self, inputFileName):
|
|
394 f = open(inputFileName, "w")
|
|
395 f.write("PotentialHostGene-chim_fTest05105818-B-G11-Map20\t1240\t+\tPotentialChimeric\tNA\tPotentialHostGene\tNA\tCI=100; coding=(HG_BLRn: FBtr0089196_Dmel_r4.3: 95.65%); other=(TE_BLRtx: PROTOP:classII:TIR: 12.03%, PROTOP_A:classII:TIR: 49.14%; TermRepeats: termTIR: 49; SSRCoverage=0.25<0.75)\n")
|
|
396 f.write("DTX-comp-chim_fTest05105818-B-G7-Map3_reversed\t1944\t-\tPotentialChimeric\tII\tTIR\tcomplete\tCI=33; coding=(TE_BLRtx: PROTOP:classII:TIR: 12.77%, PROTOP_A:classII:TIR: 25.16%, PROTOP_A:classII:TIR: 100.00%); struct=(TElength: <700bps; TermRepeats: termTIR: 844); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 29.48%; SSRCoverage=0.24<0.75)\n")
|
|
397 f.write("DTX-incomp_fTest05105818-B-G9-Map3_reversed\t1590\t-\tok\tII\tTIR\tincomplete\tCI=33; coding=(TE_BLRtx: PROTOP:classII:TIR: 10.92%, PROTOP:classII:TIR: 11.03%, PROTOP_A:classII:TIR: 55.20%); struct=(TElength: >700bps); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 35.60%; SSRCoverage=0.21<0.75)\n")
|
|
398 f.write("DTX-incomp_fTest05105818-B-P0.0-Map3\t1042\t.\tok\tII\tTIR\tincomplete\tCI=50; coding=(TE_BLRtx: PROTOP:classII:TIR: 17.39%, PROTOP_A:classII:TIR: 22.17%); struct=(TElength: >700bps; TermRepeats: termTIR: 50); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 47.22%; SSRCoverage=0.25<0.75)\n")
|
|
399 f.write("DTX-comp_fTest05105818-B-P1.0-Map9_reversed\t1137\t-\tok\tII\tTIR\tcomplete\tCI=50; coding=(TE_BLRtx: PROTOP:classII:TIR: 6.70%, PROTOP_A:classII:TIR: 66.43%, PROTOP_B:classII:TIR: 6.42%); struct=(TElength: >700bps; TermRepeats: termTIR: 52); other=(HG_BLRn: FBtr0089196_Dmel_r4.3: 51.19%; SSRCoverage=0.22<0.75)\n")
|
|
400 f.write("RLX-incomp_fTest05105818-B-R12-Map3_reversed\t2284\t-\tok\tI\tLTR\tincomplete\tCI=28; coding=(TE_BLRtx: ROOA_I:classI:LTR_retrotransposon: 27.57%, ROOA_LTR:classI:LTR_retrotransposon: 94.56%; TE_BLRx: BEL11_AGp:classI:LTR_retrotransposon: 19.47%, BEL2-I_Dmoj_1p:classI:LTR_retrotransposon: 11.49%); struct=(TElength: >700bps); other=(SSRCoverage=0.07<0.75)\n")
|
|
401 f.write("DTX-incomp_fTest05105818-B-R19-Map4\t705\t+\tok\tII\tTIR\tincomplete\tCI=66; coding=(TE_BLRtx: TC1-2_DM:classII:TIR: 42.70%; TE_BLRx: TC1-2_DMp:classII:TIR: 41.18%); struct=(TElength: >700bps); other=(SSRCoverage=0.14<0.75)\n")
|
|
402 f.write("DHX-incomp_fTest05105818-B-R1-Map4\t2367\t.\tok\tII\tHelitron\tincomplete\tCI=20; coding=(TE_BLRtx: DNAREP1_DM:classII:Helitron: 17.00%, DNAREP1_DYak:classII:Helitron: 9.08%); struct=(TElength: >700bps); other=(HG_BLRn: FBtr0089179_Dmel_r4.3: 13.52%; SSRCoverage=0.18<0.75)\n")
|
|
403 f.write("noCat_fTest05105818-B-R2-Map6\t4638\t.\tok\tnoCat\tnoCat\tNA\tCI=NA; coding=(HG_BLRn: FBtr0089179_Dmel_r4.3: 73.65%); struct=(SSRCoverage=0.05<0.75)\n")
|
|
404 f.write("PotentialHostGene-chim_fTest05105818-B-R4-Map5_reversed\t1067\t-\tPotentialChimeric\tNA\tPotentialHostGene\tNA\tCI=100; coding=(HG_BLRn: FBtr0089196_Dmel_r4.3: 99.91%); other=(TE_BLRtx: PROTOP:classII:TIR: 13.06%, PROTOP_A:classII:TIR: 37.47%; SSRCoverage=0.27<0.75)\n")
|
|
405 f.write("DTX-incomp_fTest05105818-B-R9-Map3_reversed\t714\t-\tok\tII\tTIR\tincomplete\tCI=66; coding=(TE_BLRtx: TC1_DM:classII:TIR: 40.88%; TE_BLRx: Tc1-1_TCa_1p:classII:TIR: 30.18%, Tc1-3_FR_1p:classII:TIR: 9.97%); struct=(TElength: >700bps); other=(SSRCoverage=0.08<0.75)\n")
|
|
406 f.close()
|
|
407
|
|
408 def _writePathFile_withClassif(self,inFileName):
|
|
409 f = open(inFileName,'w')
|
|
410 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\t1nc550_030\t228\t85\t1e-40\t84\t30.56\n')
|
|
411 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\t1nc550_030\t522\t229\t1e-40\t106\t23.99\n')
|
|
412 f.write('3\tlm_SuperContig_30_v2\t78081\t78088\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t19\t209\t3e-21\t101\t30.89\n')
|
|
413 f.write('3\tlm_SuperContig_30_v2\t78089\t78588\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t150\t350\t3e-22\t101\t35.89\n')
|
|
414 f.write('4\tlm_SuperContig_30_v2\t88031\t88080\tDTX-incomp_fTest05105818-B-G1-Map3\t370\t420\t3e-23\t101\t31.89\n')
|
|
415 f.write('5\tlm_SuperContig_30_v2\t108588\t108081\tDTX-incomp_fTest05105818-B-G9-Map3_reversed\t590\t820\t3e-24\t101\t32.89\n')
|
|
416 f.write('6\tlm_SuperContig_30_v2\t118081\t118588\tPotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed\t154\t289\t3e-25\t101\t33.89\n')
|
|
417 f.write('7\tlm_SuperContig_30_v2\t288031\t288080\tnoCat_Blc22_fTest05105818-B-R2-Map6\t1900\t2090\t3e-26\t101\t34.89\n')
|
|
418 f.close()
|
|
419
|
|
420 def _writeExpPathGFFFile_without_seq_withClassif(self, inFileName):
|
|
421 f = open(inFileName, 'w')
|
|
422 f.write("##gff-version 3\n")
|
|
423 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n")
|
|
424 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t78081\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 350;TargetDescription=CI:20 coding:(TE_BLRtx: DNAREP1_DM:classII:Helitron: 17.00% | DNAREP1_DYak:classII:Helitron: 9.08%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089179_Dmel_r4.3: 13.52% SSRCoverage:0.18<0.75)\n")
|
|
425 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78088\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 209;Identity=30.89\n")
|
|
426 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78089\t78588\t3e-22\t+\t.\tID=mp3-2_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 150 350;Identity=35.89\n")
|
|
427
|
|
428 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t88031\t88080\t0.0\t+\t.\tID=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n")
|
|
429 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t88031\t88080\t3e-23\t+\t.\tID=mp4-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Parent=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n")
|
|
430
|
|
431 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t108081\t108588\t0.0\t-\t.\tID=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;TargetDescription=CI:33 coding:(TE_BLRtx: PROTOP:classII:TIR: 10.92% | PROTOP:classII:TIR: 11.03% | PROTOP_A:classII:TIR: 55.20%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089196_Dmel_r4.3: 35.60% SSRCoverage:0.21<0.75);Identity=32.89\n")
|
|
432 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t108081\t108588\t3e-24\t-\t.\tID=mp5-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Parent=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;Identity=32.89\n")
|
|
433
|
|
434 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t118081\t118588\t0.0\t+\t.\tID=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;TargetDescription=CI:100 coding:(HG_BLRn: FBtr0089196_Dmel_r4.3: 99.91%) other:(TE_BLRtx: PROTOP:classII:TIR: 13.06% | PROTOP_A:classII:TIR: 37.47% SSRCoverage:0.27<0.75);Identity=33.89\n")
|
|
435 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t118081\t118588\t3e-25\t+\t.\tID=mp6-1_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Parent=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;Identity=33.89\n")
|
|
436
|
|
437 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t288031\t288080\t0.0\t+\t.\tID=ms7_lm_SuperContig_30_v2_noCat_Blc22_fTest05105818-B-R2-Map6;Target=noCat_Blc22_fTest05105818-B-R2-Map6 1900 2090;TargetDescription=CI:NA coding:(HG_BLRn: FBtr0089179_Dmel_r4.3: 73.65%) struct:(SSRCoverage:0.05<0.75);Identity=34.89\n")
|
|
438 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t288031\t288080\t3e-26\t+\t.\tID=mp7-1_lm_SuperContig_30_v2_noCat_Blc22_fTest05105818-B-R2-Map6;Parent=ms7_lm_SuperContig_30_v2_noCat_Blc22_fTest05105818-B-R2-Map6;Target=noCat_Blc22_fTest05105818-B-R2-Map6 1900 2090;Identity=34.89\n")
|
|
439
|
|
440 f.close()
|
|
441
|
|
442 def _writePathFile_withClassif_withIdenticalMatches(self,inFileName):
|
|
443 f = open(inFileName,'w')
|
|
444 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\t1nc550_030\t228\t85\t1e-40\t84\t30.56\n')
|
|
445 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\t1nc550_030\t522\t229\t1e-40\t106\t23.99\n')
|
|
446 f.write('3\tlm_SuperContig_30_v2\t78081\t78088\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t19\t209\t3e-21\t101\t30.89\n')
|
|
447 f.write('3\tlm_SuperContig_30_v2\t78089\t78588\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t150\t350\t3e-22\t101\t35.89\n')
|
|
448 f.write('4\tlm_SuperContig_30_v2\t88031\t88080\tDTX-incomp_fTest05105818-B-G1-Map3\t370\t420\t3e-23\t101\t31.89\n')
|
|
449 f.write('5\tlm_SuperContig_30_v2\t108588\t108081\tDTX-incomp_fTest05105818-B-G9-Map3_reversed\t590\t820\t3e-24\t101\t32.89\n')
|
|
450 f.write('6\tlm_SuperContig_30_v2\t118081\t118588\tPotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed\t154\t289\t3e-25\t101\t33.89\n')
|
|
451
|
|
452 f.write('7\tlm_SuperContig_30_v2\t288031\t288080\tnoCat_Blc22_fTest05105818-B-R2-Map6\t1900\t2090\t3e-26\t101\t34.89\n')
|
|
453 f.write('8\tlm_SuperContig_30_v2\t288031\t288080\tDTX-incomp_fTest05105818-B-P0.0-Map3\t100\t190\t3e-26\t101\t39.89\n')
|
|
454 f.write('9\tlm_SuperContig_30_v2\t288031\t288080\tRLX-incomp_fTest05105818-B-R12-Map3_reversed\t1100\t1290\t3e-26\t101\t40.89\n')
|
|
455 f.write('10\tlm_SuperContig_30_v2\t288031\t288080\tPotentialHostGene-chim_fTest05105818-B-G11-Map20\t990\t1890\t3e-26\t101\t38.09\n')
|
|
456
|
|
457 f.write('11\tlm_SuperContig_30_v2\t288031\t288080\tDTX-incomp_fTest05105818-B-G1-Map3\t990\t1890\t3e-26\t301\t38.09\n')
|
|
458
|
|
459 f.write('12\tlm_SuperContig_30_v2\t388031\t388080\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t19\t209\t3e-21\t101\t30.89\n')
|
|
460 f.write('12\tlm_SuperContig_30_v2\t388081\t388380\tDHX-incomp_Blc1_fTest05105818-B-R1-Map4\t150\t350\t3e-22\t101\t35.89\n')
|
|
461
|
|
462 f.write('13\tlm_SuperContig_30_v2\t388031\t388080\tDTX-incomp_fTest05105818-B-P0.0-Map3\t119\t309\t3e-21\t101\t30.89\n')
|
|
463 f.write('13\tlm_SuperContig_30_v2\t388081\t388380\tDTX-incomp_fTest05105818-B-P0.0-Map3\t250\t450\t3e-22\t101\t35.89\n')
|
|
464 f.close()
|
|
465
|
|
466 def _writeExpPathGFFFile_without_seq_withClassif_withIdenticalMatches(self, inFileName):
|
|
467 f = open(inFileName, 'w')
|
|
468 f.write("##gff-version 3\n")
|
|
469 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n")
|
|
470 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t78081\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 350;TargetDescription=CI:20 coding:(TE_BLRtx: DNAREP1_DM:classII:Helitron: 17.00% | DNAREP1_DYak:classII:Helitron: 9.08%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089179_Dmel_r4.3: 13.52% SSRCoverage:0.18<0.75)\n")
|
|
471 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78088\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 209;Identity=30.89\n")
|
|
472 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t78089\t78588\t3e-22\t+\t.\tID=mp3-2_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms3_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 150 350;Identity=35.89\n")
|
|
473
|
|
474 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t88031\t88080\t0.0\t+\t.\tID=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n")
|
|
475 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t88031\t88080\t3e-23\t+\t.\tID=mp4-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Parent=ms4_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 370 420;Identity=31.89\n")
|
|
476
|
|
477 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t108081\t108588\t0.0\t-\t.\tID=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;TargetDescription=CI:33 coding:(TE_BLRtx: PROTOP:classII:TIR: 10.92% | PROTOP:classII:TIR: 11.03% | PROTOP_A:classII:TIR: 55.20%) struct:(TElength: >700bps) other:(HG_BLRn: FBtr0089196_Dmel_r4.3: 35.60% SSRCoverage:0.21<0.75);Identity=32.89\n")
|
|
478 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t108081\t108588\t3e-24\t-\t.\tID=mp5-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Parent=ms5_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G9-Map3_reversed;Target=DTX-incomp_fTest05105818-B-G9-Map3_reversed 590 820;Identity=32.89\n")
|
|
479
|
|
480 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t118081\t118588\t0.0\t+\t.\tID=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;TargetDescription=CI:100 coding:(HG_BLRn: FBtr0089196_Dmel_r4.3: 99.91%) other:(TE_BLRtx: PROTOP:classII:TIR: 13.06% | PROTOP_A:classII:TIR: 37.47% SSRCoverage:0.27<0.75);Identity=33.89\n")
|
|
481 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t118081\t118588\t3e-25\t+\t.\tID=mp6-1_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Parent=ms6_lm_SuperContig_30_v2_PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed;Target=PotentialHostGene-chim_Blc6_fTest05105818-B-R4-Map5_reversed 154 289;Identity=33.89\n")
|
|
482
|
|
483 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t288031\t288080\t0.0\t+\t.\tID=ms10_lm_SuperContig_30_v2_PotentialHostGene-chim_fTest05105818-B-G11-Map20;Target=PotentialHostGene-chim_fTest05105818-B-G11-Map20 990 1890;OtherTargets=RLX-incomp_fTest05105818-B-R12-Map3_reversed 1100 1290, DTX-incomp_fTest05105818-B-P0.0-Map3 100 190, noCat_Blc22_fTest05105818-B-R2-Map6 1900 2090\n")
|
|
484 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t288031\t288080\t3e-26\t+\t.\tID=mp10-1_lm_SuperContig_30_v2_PotentialHostGene-chim_fTest05105818-B-G11-Map20;Parent=ms10_lm_SuperContig_30_v2_PotentialHostGene-chim_fTest05105818-B-G11-Map20;Target=PotentialHostGene-chim_fTest05105818-B-G11-Map20 990 1890\n")
|
|
485
|
|
486 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t288031\t288080\t0.0\t+\t.\tID=ms11_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 990 1890;Identity=38.09\n")
|
|
487 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t288031\t288080\t3e-26\t+\t.\tID=mp11-1_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Parent=ms11_lm_SuperContig_30_v2_DTX-incomp_fTest05105818-B-G1-Map3;Target=DTX-incomp_fTest05105818-B-G1-Map3 990 1890;Identity=38.09\n")
|
|
488
|
|
489 #TODO:
|
|
490 #Should this case really occur : If merging multiple match-parts, the current behaviour needs to be fixed to get correct subject start/end coordinates
|
|
491 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch\t388031\t388380\t0.0\t+\t.\tID=ms12_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 350;OtherTargets=DTX-incomp_fTest05105818-B-P0.0-Map3 119 309\n")
|
|
492 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t388031\t388080\t3e-21\t+\t.\tID=mp12-1_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms12_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 19 209\n")
|
|
493 f.write("lm_SuperContig_30_v2\tprojectName_REPET_TEs\tmatch_part\t388081\t388380\t3e-22\t+\t.\tID=mp12-2_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Parent=ms12_lm_SuperContig_30_v2_DHX-incomp_Blc1_fTest05105818-B-R1-Map4;Target=DHX-incomp_Blc1_fTest05105818-B-R1-Map4 150 350\n")
|
|
494 f.close()
|
|
495
|
|
496 def _writeTablesFile_withTESeqTables(self, tableType):
|
|
497 tableFile = open( self._tablesFileName, "w" )
|
|
498 string = "%s_REPET_TEs\t%s\t%s_chr_allTEs_nr_noSSR_join_%s\t%s_refTEs_seq\n" % (self._projectName, tableType, self._projectName, tableType, self._projectName)
|
|
499 tableFile.write(string)
|
|
500 string = "%s_REPET_TEs\t%s\t%s_chr_bankBLRtx_%s\t%s_bankBLRtx_nt_seq\n" % (self._projectName, tableType, self._projectName, tableType, self._projectName)
|
|
501 tableFile.write(string)
|
|
502 tableFile.close()
|
|
503
|
|
504 def _writeTablesFile(self, tableType):
|
|
505 tableFile = open( self._tablesFileName, "w" )
|
|
506 string = "%s_REPET_TEs\t%s\t%s_chr_allTEs_nr_noSSR_join_%s\n" % (self._projectName, tableType, self._projectName, tableType)
|
|
507 tableFile.write(string)
|
|
508 tableFile.close()
|
|
509
|
|
510 def _writePathFile(self,inFileName):
|
|
511 f = open(inFileName,'w')
|
|
512 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\tDTX-incomp_DmelChr4-L-B1-Map3\t228\t85\t1e-40\t84\t30.56\n')
|
|
513 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\tDTX-incomp_DmelChr4-L-B1-Map3\t522\t229\t1e-40\t106\t23.99\n')
|
|
514 f.write('3\tlm_SuperContig_29_v2\t78031\t78080\tDTX-incomp_DmelChr4-B-P0.0-Map3\t19\t209\t3e-21\t101\t30.89\n')
|
|
515 f.write('3\tlm_SuperContig_29_v2\t78081\t78588\tDTX-incomp_DmelChr4-B-P0.0-Map3\t19\t209\t3e-21\t101\t30.89\n')
|
|
516 f.close()
|
|
517
|
|
518 def _writePathFile2(self,inFileName):
|
|
519 f = open(inFileName,'w')
|
|
520 f.write('1\tchr1\t1\t100\tPROTOP_B:classII:TIR\t228\t85\t1e-20\t84\t30.56\n')
|
|
521 f.write('2\tchr1\t100\t1000\tDMRT1C:classI:?\t228\t85\t1e-30\t84\t30.56\n')
|
|
522 f.close()
|
|
523
|
|
524 def _writePathFile_refTEs_annotation(self,inFileName):
|
|
525 f = open(inFileName,'w')
|
|
526 f.write('6\tdmel_chr4\t4364\t4611\tDTX-incomp_DmelChr4-L-B1-Map3\t854\t1150\t0\t1475\t91.24\n')
|
|
527 f.write('21\tdmel_chr4\t4630\t4704\tDTX-incomp_DmelChr4-B-P0.0-Map3\t232\t312\t0\t65\t84.4417\n')
|
|
528 f.write('21\tdmel_chr4\t4837\t4889\tDTX-incomp_DmelChr4-B-P0.0-Map3\t456\t512\t0\t46\t84.4417\n')
|
|
529 f.write('35\tdmel_chr1\t4364\t4611\tDTX-incomp_DmelChr4-L-B1-Map3\t854\t1150\t0\t1475\t91.24\n')
|
|
530 f.close()
|
|
531
|
|
532 def _writePathFile_bankBLRtx_annotation(self,inFileName):
|
|
533 f = open(inFileName, 'w')
|
|
534 f.write('27\tdmel_chr4\t4412\t4501\tPROTOP_B:classII:TIR\t1082\t993\t5e-108\t702\t91.3066\n')
|
|
535 f.write('27\tdmel_chr4\t4483\t4652\tPROTOP_B:classII:TIR\t990\t821\t9e-146\t707\t86.5572\n')
|
|
536 f.write('27\tdmel_chr4\t4672\t4775\tPROTOP_B:classII:TIR\t787\t684\t9e-146\t707\t88.8013\n')
|
|
537 f.write('27\tdmel_chr4\t4782\t4889\tPROTOP_B:classII:TIR\t661\t553\t9e-141\t356\t53.7067\n')
|
|
538 f.write('66\tdmel_chr4\t3143\t3361\tDMRT1C:classI:?\t2206\t1988\t0\t1878\t89.7202\n')
|
|
539 f.write('66\tdmel_chr4\t3345\t3410\tDMRT1C:classI:?\t1719\t1654\t0\t313\t54.55\n')
|
|
540 f.write('66\tdmel_chr4\t3448\t3572\tDMRT1C:classI:?\t1619\t1495\t0\t1252\t77.3433\n')
|
|
541 f.write('66\tdmel_chr4\t3564\t3693\tDMRT1C:classI:?\t1509\t1380\t0\t1565\t83.7306\n')
|
|
542 f.write('66\tdmel_chr4\t3705\t3857\tDMRT1C:classI:?\t691\t539\t0\t1252\t84.5762\n')
|
|
543 f.write('66\tdmel_chr4\t3861\t3992\tDMRT1C:classI:?\t534\t403\t0\t1565\t67.1422\n')
|
|
544 f.write('66\tdmel_chr4\t3985\t4106\tDMRT1C:classI:?\t411\t290\t0\t1252\t76.612\n')
|
|
545 f.write('66\tdmel_chr4\t4102\t4248\tDMRT1C:classI:?\t285\t139\t0\t1565\t75.3027\n')
|
|
546 f.write('66\tdmel_chr4\t4236\t4364\tDMRT1C:classI:?\t152\t24\t0\t1565\t66.6657\n')
|
|
547 f.write('141\tdmel_chr4\t4917\t5007\tBATUMI_I:classI:LTR_retrotransposon\t7030\t7120\t2e-144\t984\t94.1756\n')
|
|
548 f.write('141\tdmel_chr4\t4976\t5195\tBATUMI_I:classI:LTR_retrotransposon\t7087\t7303\t2e-144\t2098\t82.2343\n')
|
|
549 f.close()
|
|
550
|
|
551 def _writePathFileReverse(self,inFileName):
|
|
552 f = open(inFileName,'w')
|
|
553 f.write('1\tlm_SuperContig_29_v2\t193781\t194212\t1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein)\t228\t85\t1e-40\t84\t30.56\n')
|
|
554 f.write('2\tlm_SuperContig_29_v2\t192832\t193704\t1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein)\t522\t229\t1e-40\t106\t23.99\n')
|
|
555 f.write('3\tlm_SuperContig_29_v2\t78080\t78031\txnc164_090 related to multidrug resistance protein\t19\t209\t3e-21\t101\t30.89\n')
|
|
556 f.write('3\tlm_SuperContig_29_v2\t78588\t78081\txnc164_090 related to multidrug resistance protein\t19\t209\t3e-21\t101\t30.89\n')
|
|
557 f.close()
|
|
558
|
|
559 def _writeSetFile(self,inFileName):
|
|
560 f = open(inFileName,'w')
|
|
561 f.write('1\tset1\tlm_SuperContig_29_v2\t193781\t194212\n')
|
|
562 f.write('2\tset2\tlm_SuperContig_29_v2\t192832\t193704\n')
|
|
563 f.write('3\tset3\tlm_SuperContig_29_v2\t78031\t78080\n')
|
|
564 f.write('3\tset3\tlm_SuperContig_29_v2\t78081\t78588\n')
|
|
565 f.close()
|
|
566
|
|
567 def _writeSetFileReverse(self,inFileName):
|
|
568 f = open(inFileName,'w')
|
|
569 f.write('1\tset1\tlm_SuperContig_29_v2\t193781\t194212\n')
|
|
570 f.write('2\tset2\tlm_SuperContig_29_v2\t192832\t193704\n')
|
|
571 f.write('3\tset3\tlm_SuperContig_29_v2\t78080\t78031\n')
|
|
572 f.write('3\tset3\tlm_SuperContig_29_v2\t78588\t78081\n')
|
|
573 f.close()
|
|
574
|
|
575 def _writeExpEmptyPathGFFFile(self, inFileName):
|
|
576 f = open(inFileName, 'w')
|
|
577 f.write("##gff-version 3\n")
|
|
578 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n")
|
|
579 f.close()
|
|
580
|
|
581 def _writeExpPathGFFFile(self, inFileName):
|
|
582 f = open(inFileName, 'w')
|
|
583 f.write("##gff-version 3\n")
|
|
584 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
|
|
585 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
|
|
586 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
|
|
587 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
|
|
588 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
|
|
589 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209\n")
|
|
590 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
|
|
591 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
|
|
592 f.write("##FASTA\n")
|
|
593 self._writeSeq1(f)
|
|
594 f.close()
|
|
595
|
|
596 def _writeExpEmptyPathGFFFileWithSeq(self, inFileName):
|
|
597 f = open(inFileName, 'w')
|
|
598 f.write("##gff-version 3\n")
|
|
599 f.write("##sequence-region lm_SuperContig_30_v2 1 120\n")
|
|
600 f.write("##FASTA\n")
|
|
601 self._writeSeq2(f)
|
|
602 f.close()
|
|
603
|
|
604 def _writeExpPathGFFFile_without_seq(self, inFileName):
|
|
605 f = open(inFileName, 'w')
|
|
606 f.write("##gff-version 3\n")
|
|
607 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
|
|
608 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
|
|
609 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
|
|
610 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
|
|
611 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Parent=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
|
|
612 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209\n")
|
|
613 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
|
|
614 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
|
|
615 f.close()
|
|
616
|
|
617 def _writeExpPathGFFFile_without_seq_and_match_part_not_comulsory(self, inFileName):
|
|
618 f = open(inFileName, 'w')
|
|
619 f.write("##gff-version 3\n")
|
|
620 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
|
|
621 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 85 228;Identity=30.56\n")
|
|
622 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-L-B1-Map3;Target=DTX-incomp_DmelChr4-L-B1-Map3 229 522;Identity=23.99\n")
|
|
623 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209\n")
|
|
624 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
|
|
625 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Parent=ms3_lm_SuperContig_29_v2_DTX-incomp_DmelChr4-B-P0.0-Map3;Target=DTX-incomp_DmelChr4-B-P0.0-Map3 19 209;Identity=30.89\n")
|
|
626 f.close()
|
|
627
|
|
628 def _writeExpPathGFFFileReversed(self, inFileName):
|
|
629 f = open(inFileName, 'w')
|
|
630 f.write("##gff-version 3\n")
|
|
631 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
|
|
632 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t-\t.\tID=ms1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 85 228;Identity=30.56\n")
|
|
633 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t1e-40\t-\t.\tID=mp1-1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Parent=ms1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 85 228;Identity=30.56\n")
|
|
634 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t-\t.\tID=ms2_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 229 522;Identity=23.99\n")
|
|
635 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t1e-40\t-\t.\tID=mp2-1_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Parent=ms2_lm_SuperContig_29_v2_1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein);Target=1nc550_030 related to putative multidrug transporter Mfs1.1 (major facilitator family protein) 229 522;Identity=23.99\n")
|
|
636 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t-\t.\tID=ms3_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Target=xnc164_090 related to multidrug resistance protein 19 209\n")
|
|
637 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t3e-21\t-\t.\tID=mp3-1_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Parent=ms3_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Target=xnc164_090 related to multidrug resistance protein 19 209;Identity=30.89\n")
|
|
638 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t3e-21\t-\t.\tID=mp3-2_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Parent=ms3_lm_SuperContig_29_v2_xnc164_090 related to multidrug resistance protein;Target=xnc164_090 related to multidrug resistance protein 19 209;Identity=30.89\n")
|
|
639 f.write("##FASTA\n")
|
|
640 self._writeSeq1(f)
|
|
641 f.close()
|
|
642
|
|
643 def _writeExpSetGFFFile(self, inFileName):
|
|
644 f = open(inFileName, 'w')
|
|
645 f.write("##gff-version 3\n")
|
|
646 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
|
|
647 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t+\t.\tID=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n")
|
|
648 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t0.0\t+\t.\tID=mp1-1_lm_SuperContig_29_v2_set1;Parent=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n")
|
|
649 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t+\t.\tID=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n")
|
|
650 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t0.0\t+\t.\tID=mp2-1_lm_SuperContig_29_v2_set2;Parent=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n")
|
|
651 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t+\t.\tID=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 558\n")
|
|
652 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t0.0\t+\t.\tID=mp3-1_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 50\n")
|
|
653 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t0.0\t+\t.\tID=mp3-2_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 508\n")
|
|
654 f.write("##FASTA\n")
|
|
655 self._writeSeq1(f)
|
|
656 f.close()
|
|
657
|
|
658 def _writeExpSetGFFFileReversed(self, inFileName):
|
|
659 f = open(inFileName, 'w')
|
|
660 f.write("##gff-version 3\n")
|
|
661 f.write("##sequence-region lm_SuperContig_29_v2 1 120\n")
|
|
662 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t193781\t194212\t0.0\t+\t.\tID=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n")
|
|
663 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t193781\t194212\t0.0\t+\t.\tID=mp1-1_lm_SuperContig_29_v2_set1;Parent=ms1_lm_SuperContig_29_v2_set1;Target=set1 1 432\n")
|
|
664 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t192832\t193704\t0.0\t+\t.\tID=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n")
|
|
665 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t192832\t193704\t0.0\t+\t.\tID=mp2-1_lm_SuperContig_29_v2_set2;Parent=ms2_lm_SuperContig_29_v2_set2;Target=set2 1 873\n")
|
|
666 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch\t78031\t78588\t0.0\t-\t.\tID=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 558\n")
|
|
667 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78031\t78080\t0.0\t-\t.\tID=mp3-1_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 50\n")
|
|
668 f.write("lm_SuperContig_29_v2\tprojectName_REPET_TEs\tmatch_part\t78081\t78588\t0.0\t-\t.\tID=mp3-2_lm_SuperContig_29_v2_set3;Parent=ms3_lm_SuperContig_29_v2_set3;Target=set3 1 508\n")
|
|
669 f.write("##FASTA\n")
|
|
670 self._writeSeq1(f)
|
|
671 f.close()
|
|
672
|
|
673 def _writeFastaFile(self, inFileName):
|
|
674 f = open(inFileName,'w')
|
|
675 self._writeSeq2(f)
|
|
676 self._writeSeq1(f)
|
|
677 f.close()
|
|
678
|
|
679 def _writeFastaFileExtended(self, inFileName):
|
|
680 f = open(inFileName,'w')
|
|
681 self._writeSeq2(f)
|
|
682 self._writeSeq1(f)
|
|
683 f.write(">chr1\n")
|
|
684 f.write("CTAAGCTGCGCTATGTAG\n")
|
|
685 f.close()
|
|
686
|
|
687 def _writeSeq1(self, f):
|
|
688 f.write('>lm_SuperContig_29_v2\n')
|
|
689 f.write('CCTAGACAATTAATTATAATAATTAATAAACTATTAGGCTAGTAGTAGGTAATAATAAAA\n')
|
|
690 f.write('GGATTACTACTAAGCTGCGCTATGTAGATATTTAAAACATGTGGCTTAGGCAAGAGTATA\n')
|
|
691
|
|
692 def _writeSeq2(self, f):
|
|
693 f.write('>lm_SuperContig_30_v2\n')
|
|
694 f.write('TGTTCATATTCATAGGATGGAGCTAGTAAGCGATGTCGGCTTAGCTCATCCACATGAATG\n')
|
|
695 f.write('CAGGAATCATGAAGGGTACGACTGTTCGTCGATTAAAGAGCTACACGAGCTGGGTTAAAT\n')
|
|
696
|
|
697 def _writeFastaFile_DmelChr4(self, inFileName):
|
|
698 f = open(inFileName,'w')
|
|
699 f.write(">dmel_chr4\n")
|
|
700 f.write("CTAAGCTGCGCTATGTAG\n")
|
|
701 f.write(">dmel_chr1\n")
|
|
702 f.write("CGTAACGCTAGCGCTTATAGTGAGC\n")
|
|
703 f.close()
|
|
704
|
|
705
|
|
706 if __name__ == "__main__":
|
|
707 unittest.main() |