Mercurial > repos > urgi-team > teiso
comparison TEisotools-1.1.a/TEiso/LaunchTEiso.py @ 13:feef9a0db09d draft
Uploaded
author | urgi-team |
---|---|
date | Wed, 20 Jul 2016 09:04:42 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
12:22b0494ec883 | 13:feef9a0db09d |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # Copyright INRA (Institut National de la Recherche Agronomique) | |
4 # http://www.inra.fr | |
5 # http://urgi.versailles.inra.fr | |
6 # | |
7 # This software is governed by the CeCILL license under French law and | |
8 # abiding by the rules of distribution of free software. You can use, | |
9 # modify and/ or redistribute the software under the terms of the CeCILL | |
10 # license as circulated by CEA, CNRS and INRIA at the following URL | |
11 # "http://www.cecill.info". | |
12 # | |
13 # As a counterpart to the access to the source code and rights to copy, | |
14 # modify and redistribute granted by the license, users are provided only | |
15 # with a limited warranty and the software's author, the holder of the | |
16 # economic rights, and the successive licensors have only limited | |
17 # liability. | |
18 # | |
19 # In this respect, the user's attention is drawn to the risks associated | |
20 # with loading, using, modifying and/or developing or reproducing the | |
21 # software by the user in light of its specific status of free software, | |
22 # that may mean that it is complicated to manipulate, and that also | |
23 # therefore means that it is reserved for developers and experienced | |
24 # professionals having in-depth computer knowledge. Users are therefore | |
25 # encouraged to load and test the software's suitability as regards their | |
26 # requirements in conditions enabling the security of their systems and/or | |
27 # data to be ensured and, more generally, to use and operate it in the | |
28 # same conditions as regards security. | |
29 # | |
30 # The fact that you are presently reading this means that you have had | |
31 # knowledge of the CeCILL license and that you accept its terms. | |
32 | |
33 | |
34 from commons.core.parsing.GtfParser import GtfParser | |
35 from commons.core.parsing.GffParser import GffParser | |
36 from TEiso.Bowtie_build import Bowtie_build | |
37 from TEiso.Tophat import Tophat | |
38 from TEiso.Cufflinks import Cufflinks | |
39 from TEiso.Cuffcompare import Cuffcompare | |
40 from TEiso.Bedtools_closest import Bedtools_closest | |
41 from commons.core.LoggerFactory import LoggerFactory | |
42 from commons.core.utils.RepetOptionParser import RepetOptionParser | |
43 from commons.core.utils.FileUtils import FileUtils | |
44 import os | |
45 import time | |
46 import re | |
47 import sys | |
48 LOG_NAME = "repet.TEiso" | |
49 | |
50 class LaunchTEiso(object): | |
51 | |
52 def __init__(self, reference="", input_transcripts="", single_paired="", single_reads="", left_reads="", right_reads="", transposable_element = "", assembly_tool="", verbosity=3): | |
53 self._reference = reference | |
54 self._transcripts = input_transcripts | |
55 self._type = single_paired | |
56 self._single_reads = single_reads.split(",") | |
57 self._left_reads = left_reads.split(",") | |
58 self._right_reads = right_reads.split(",") | |
59 self._TE = transposable_element | |
60 self._assembly_tool = assembly_tool | |
61 self._verbosity = verbosity | |
62 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_NAME, self.__class__.__name__), self._verbosity) | |
63 | |
64 def _setAttributesFromCmdLine(self): | |
65 self._toolVersion = "0.1" | |
66 description = "TEiso version %s" % self._toolVersion | |
67 epilog = "\n if reads are single:\n LaunchTEiso.py -f <genome.fa> -g <transcripts.gtf> -e single -s <single_read> -t <TE.gff> -a cufflinks \n" | |
68 epilog += " if reads are paired:\n LaunchTEiso.py -f <genome.fa> -g <transcripts.gtf> -e paired -l <reads_left> -r <reads_right> -t <TE.gff> -a cufflinks \n" | |
69 parser = RepetOptionParser(description = description, epilog = epilog, version = self._toolVersion) | |
70 parser.add_option('-f' , '--input_reference' , dest='input_reference' , help='file with ref sequences') | |
71 parser.add_option('-g' , '--input_transcripts', dest='input_transcripts', help='GTF/GFF with known transcripts' , default="") | |
72 parser.add_option('-e' , '--single_paired' , dest='single_paired' , help='type of input reads, single or paired end', default="paired") | |
73 parser.add_option('-s' , '--single_read' , dest='single_read' , help='a single input read' , default="") | |
74 parser.add_option('-l', '--left_read' , dest='left_read' , help='left reads' , default="") | |
75 parser.add_option('-r', '--right_read' , dest='right_read' , help='right reads' , default="") | |
76 parser.add_option('-t' , '--input_transposable_element', dest='input_transposable_element', help='GFF with known transposable_element' , default="") | |
77 parser.add_option('-a' , '--assembly_tool' , dest='assembly_tool' , help='type of RNA-Seq assembly tool' , default="cufflinks") | |
78 options = parser.parse_args()[0] | |
79 self.setAttributesFromOptions(options) | |
80 | |
81 def setAttributesFromOptions(self, options): | |
82 self._reference = options.input_reference | |
83 self._transcripts = options.input_transcripts | |
84 self._type = options.single_paired | |
85 self._single_reads = options.single_read.split(",") | |
86 self._left_reads = options.left_read.split(",") | |
87 self._right_reads = options.right_read.split(",") | |
88 self._TE = options.input_transposable_element | |
89 self._assembly_tool = options.assembly_tool | |
90 | |
91 def _logAndRaise(self, errorMsg): | |
92 self._log.error(errorMsg) | |
93 #raise Exception(errorMsg) | |
94 sys.exit(1) | |
95 | |
96 def checkOptions(self): | |
97 if self._type == "paired": | |
98 if self._single_reads != ['']: | |
99 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!") | |
100 if self._left_reads == ['']: | |
101 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!") | |
102 if self._right_reads == ['']: | |
103 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!") | |
104 if self._right_reads == self._left_reads: | |
105 self._logAndRaise("ERROR: -l and -r options are same!") | |
106 | |
107 if self._type == "single": | |
108 if self._left_reads != ['']: | |
109 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!") | |
110 if self._right_reads != ['']: | |
111 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!") | |
112 if self._single_reads == ['']: | |
113 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!") | |
114 | |
115 if self._reference != "": | |
116 if not FileUtils.isRessourceExists(self._reference): | |
117 self._logAndRaise("ERROR: reference file %s does not exist!" % self._reference) | |
118 else: | |
119 self._logAndRaise("ERROR: No specified -f option!") | |
120 | |
121 if self._transcripts != "": | |
122 if not FileUtils.isRessourceExists(self._transcripts): | |
123 self._logAndRaise("ERROR: transcripts file %s does not exist!" % self._transcripts) | |
124 else: | |
125 self._logAndRaise("ERROR: No specified -g option!") | |
126 | |
127 if self._TE != "": | |
128 if not FileUtils.isRessourceExists(self._TE): | |
129 self._logAndRaise("ERROR: transposable elements %s does not exist!" % self._TE) | |
130 else: | |
131 self._logAndRaise("ERROR: No specified -t option!") | |
132 | |
133 | |
134 | |
135 def getTranscriptToBed(self, inputFile,outputFile): | |
136 try: | |
137 filewrite=open(outputFile, "w") | |
138 gtfParser = GtfParser(inputFile, assemblyTools=True) | |
139 for transcript in gtfParser.getIterator(): | |
140 if(transcript.getDirection()==1): | |
141 strand="+" | |
142 else: | |
143 strand="-" | |
144 filewrite.write("%s\t%s\t%s\t%s\t%s\t%s\t%.3f\n" % (transcript.getChromosome(),transcript.getStart(), | |
145 transcript.getEnd(), transcript.getTagValue("ID"), transcript.getTagValue("gene_id"), | |
146 strand,float(transcript.getTagValue("FPKM")) )) | |
147 | |
148 filewrite.close() | |
149 except: | |
150 self._logAndRaise("Couldn't open %s for writing" % outputFile) | |
151 | |
152 | |
153 def getTEGFFToBed(self, inputFile, outputFile): | |
154 """TODO Dont write bed line when the strand is '.' | |
155 See Gtf parser option assemblyTools | |
156 """ | |
157 try: | |
158 filewrite=open(outputFile, "w") | |
159 gffParser = GffParser(inputFile) | |
160 for transcript in gffParser.getIterator(): | |
161 if(transcript.getDirection()==1): | |
162 strand="+" | |
163 else: | |
164 strand="-" | |
165 filewrite.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (transcript.getChromosome(),transcript.getStart(), | |
166 transcript.getEnd(), transcript.getTagValue("ID").split("_")[0]+"_", transcript.getTagValue("Target").split("_")[0], strand) ) | |
167 | |
168 filewrite.close() | |
169 except: | |
170 self._logAndRaise("Couldn't open %s for writing" % outputFile) | |
171 | |
172 | |
173 def getTEnearPromoter (self, bedtoolsfile): | |
174 #### BEdParser.py in commons is not used because the format of this bed file is different. | |
175 # #Chrom starttr endtr transcript_id gene_ID strand fpkm chromte startte endte idte targetTE strandTE distance | |
176 #scaffold_1 37570 37785 GSSPFG00034586001-RA GSSPFG00034586001 + 0.0000000000 scaffold_1 33914 40164 ms162_ PotentialHostGene - 0 | |
177 | |
178 linelist = [] | |
179 tmplist = [] | |
180 with open(bedtoolsfile, "r") as bedFile: | |
181 for line in bedFile.readlines(): | |
182 m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\t+([^\t]+)\t+([^\t]+)\t+([+-])\t+(\d+\.\d+)\t+([^\t]+)+\t+(\d+)\t+(\d+)\t+([^\t]+)+\t+([^\t]+)\t+([+-])\t+([^\t]+)",line) | |
183 if(m != None): | |
184 start_TR = int(m.group(2))##F[1] | |
185 end_TR = int(m.group(3))##F[2] | |
186 strand_TR= m.group(6) ##[5] | |
187 start_TE = int(m.group(9))##[8] | |
188 end_TE = int(m.group(10))##[9] | |
189 dist = int(m.group(14))##[13] | |
190 if (start_TE < start_TR) and (end_TE < start_TR) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE) and (dist != 0): | |
191 tmplist.append(line.strip()) | |
192 tmplist.append("TE_closest_TSS") | |
193 linelist.append(tmplist) | |
194 tmplist = [] | |
195 # F[1] gene F[2] | |
196 # =========================> | |
197 # ------------ | |
198 # F[8] F[9] | |
199 | |
200 if (start_TE > end_TR) and (end_TE > end_TR) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE) and (dist != 0): | |
201 tmplist.append(line.strip()) | |
202 tmplist.append("TE_closest_TSS") | |
203 linelist.append(tmplist) | |
204 tmplist = [] | |
205 | |
206 # F[1] F[2] | |
207 # <====================== | |
208 # --------------- | |
209 | |
210 if (start_TE <= start_TR) and (start_TR < end_TE) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE): | |
211 for i in range(0,len(line.split("\t"))-1): | |
212 tmplist.append(line.split("\t")[i]) | |
213 overlap = (end_TE-start_TR)+1 | |
214 tmplist.append(overlap) | |
215 tmplist.append("TE_overlap_TSS") | |
216 linelist.append(tmplist) | |
217 tmplist = [] | |
218 | |
219 # F[1] gene F[2] | |
220 # =========================> | |
221 # ------------- | |
222 # F[8] F[9] | |
223 | |
224 # gene | |
225 # F[1]=========================>F[2] | |
226 | |
227 # F[8]---------------F[9] | |
228 | |
229 | |
230 if (start_TE < start_TR) and (start_TR == end_TE) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE): | |
231 for i in range(0,len(line.split("\t"))-1): | |
232 tmplist.append(line.split("\t")[i]) | |
233 tmplist.append(0) | |
234 tmplist.append("TE_overlap_TSS") | |
235 linelist.append(tmplist) | |
236 tmplist = [] | |
237 | |
238 ## F[1]=============================>F[2] | |
239 ## F[8]---------------F[9] | |
240 | |
241 | |
242 if (start_TE < end_TR) and (end_TR <= end_TE) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE): | |
243 for i in range(0,len(line.split("\t"))-1): | |
244 tmplist.append(line.split("\t")[i]) | |
245 overlap = (end_TR-start_TE)+1 | |
246 tmplist.append(overlap) | |
247 tmplist.append("TE_overlap_TSS") | |
248 linelist.append(tmplist) | |
249 tmplist = [] | |
250 | |
251 | |
252 # F[1]<======================F[2] | |
253 # --------------- | |
254 # F[8] F[9] | |
255 # | |
256 # | |
257 # F[1]<=============================F[2] | |
258 # F[8]---------------F[9] | |
259 | |
260 if (start_TE == end_TR) and (end_TR < end_TE) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE): | |
261 for i in range(0,len(line.split("\t"))-1): | |
262 tmplist.append(line.split("\t")[i]) | |
263 tmplist.append(0) | |
264 tmplist.append("TE_overlap_TSS") | |
265 linelist.append(tmplist) | |
266 tmplist = [] | |
267 | |
268 # F[1]<=============================F[2] | |
269 # F[8]---------------F[9] | |
270 | |
271 if (start_TR < start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE < end_TR) and (dist == 0): | |
272 tmplist.append(line.strip()) | |
273 tmplist.append("TE_in_gene") | |
274 linelist.append(tmplist) | |
275 tmplist = [] | |
276 | |
277 # F[1] gene F[2] | |
278 # ============================== | |
279 # ----------- | |
280 # F[8] F[9] | |
281 | |
282 | |
283 if (start_TE < start_TR) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TR < end_TE): | |
284 for i in range(0,len(line.split("\t"))-1): | |
285 tmplist.append(line.split("\t")[i]) | |
286 lenTE = (end_TE-start_TE)+1 | |
287 tmplist.append(lenTE) | |
288 tmplist.append("gene_in_TE") | |
289 linelist.append(tmplist) | |
290 tmplist = [] | |
291 | |
292 # F[1]======================F[2] | |
293 # F[8]----------------------------------------------------F[9] | |
294 | |
295 | |
296 if (strand_TR =="+") and (start_TR > start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE == end_TR): | |
297 tmplist.append(line.strip()) | |
298 tmplist.append("gene_in_TE") | |
299 linelist.append(tmplist) | |
300 tmplist = [] | |
301 | |
302 # F[1]==================================>F[2] | |
303 # F[8]----------------------------------------------------------F[9] | |
304 | |
305 if (strand_TR =="-") and (start_TR > start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE == end_TR): | |
306 tmplist.append(line.strip()) | |
307 tmplist.append("gene_in_TE") | |
308 linelist.append(tmplist) | |
309 tmplist = [] | |
310 | |
311 # F[1]<==================================F[2] | |
312 # F[8]----------------------------------------------------------F[9] | |
313 | |
314 if (strand_TR =="+") and (start_TR == start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE > end_TR): | |
315 tmplist.append(line.strip()) | |
316 tmplist.append("gene_in_TE") | |
317 linelist.append(tmplist) | |
318 tmplist = [] | |
319 | |
320 # F[1]==================================>F[2] | |
321 # F[8]----------------------------------------------------------F[9] | |
322 | |
323 if (strand_TR =="-") and (start_TR == start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE > end_TR): | |
324 tmplist.append(line.strip()) | |
325 tmplist.append("gene_in_TE") | |
326 linelist.append(tmplist) | |
327 tmplist = [] | |
328 | |
329 # F[1]<==================================F[2] | |
330 # F[8]----------------------------------------------------------F[9] | |
331 | |
332 favorablecases = "%s_TSSoverlaps_and_TE_closest_TSS_and_inclus_ALL" % bedtoolsfile | |
333 w = open(favorablecases,'w') | |
334 for s in linelist: | |
335 line= "\t".join(str(item) for item in s) | |
336 w.write("%s\n" % line) | |
337 w.close() | |
338 | |
339 | |
340 def getClassCodeCuffcompare(self, tmap_file, bedtools_file): | |
341 class_code_dic = {} | |
342 lcode_ref = [] | |
343 tmp = [] | |
344 linetowrite =[] | |
345 with open(tmap_file) as tmap: | |
346 tmapline = tmap.readlines() | |
347 for i in range(1,len(tmapline)): | |
348 cuff_id = tmapline[i].split("\t")[4].strip() | |
349 class_code = tmapline[i].split("\t")[2].strip() | |
350 ref_id = tmapline[i].split("\t")[1].strip() | |
351 lcode_ref.append(class_code) | |
352 lcode_ref.append(ref_id) | |
353 class_code_dic[cuff_id] = lcode_ref | |
354 lcode_ref = [] | |
355 | |
356 with open(bedtools_file) as bedtools: | |
357 bedtoolsline = bedtools.readlines() | |
358 for i in xrange(0,len(bedtoolsline)): | |
359 tmp = bedtoolsline[i].strip().split("\t") | |
360 transcript_bedtools = bedtoolsline[i].split("\t")[3].strip() | |
361 if transcript_bedtools in class_code_dic.keys(): | |
362 tmp.append(class_code_dic[transcript_bedtools][0]) | |
363 tmp.append(class_code_dic[transcript_bedtools][1]) | |
364 else: | |
365 tmp.append("NA") | |
366 tmp.append("NA") | |
367 | |
368 linetowrite.append(tmp) | |
369 tmp=[] | |
370 | |
371 | |
372 output = "%s_with_Ref" % bedtools_file | |
373 w = open(output,'w') | |
374 line = "" | |
375 for i in xrange(0,len(linetowrite)): | |
376 for j in range(0,17): | |
377 line = line + linetowrite[i][j] + "\t" | |
378 w.write(line) | |
379 w.write("\n") | |
380 line = "" | |
381 w.close() | |
382 | |
383 def run(self): | |
384 self.checkOptions() | |
385 try: | |
386 LoggerFactory.setLevel(self._log, self._verbosity) | |
387 exeDir = os.getcwd() | |
388 workingDir = "out_TEiso_%s" % time.strftime("%Y%m%d%H%M%S") | |
389 if os.path.exists(workingDir): | |
390 self._logAndRaise("ERROR: %s already exists." % workingDir) | |
391 os.mkdir(workingDir) | |
392 referencefile = os.path.abspath(self._reference) | |
393 transcriptsfile = os.path.abspath(self._transcripts) | |
394 TEfile = os.path.abspath(self._TE) | |
395 print "workingDir >>>>> ",workingDir | |
396 os.symlink("%s" % os.path.abspath(self._reference), "%s/%s" % (workingDir, os.path.basename(self._reference))) | |
397 os.symlink("%s" % os.path.abspath(self._transcripts), "%s/%s" % (workingDir, os.path.basename(self._transcripts))) | |
398 os.chdir(workingDir) | |
399 bowtie_build_Dir = "bowtie_build" | |
400 prefixbowtie = os.path.basename(self._reference).split(".")[0] | |
401 iLaunchBowtie = Bowtie_build(referencefile, prefixbowtie, bowtie_build_Dir) | |
402 iLaunchBowtie.run() | |
403 os.chdir(exeDir) | |
404 self._log.info("Indexing genome is finished!!!!") | |
405 tophat_Dir = "tophat" | |
406 if self._type == "single": | |
407 l_single_reads = [] | |
408 for reads in range(0, len(self._single_reads)): | |
409 os.symlink("%s" % os.path.abspath(self._single_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._single_reads[reads]))) | |
410 filename = os.path.splitext(self._single_reads[reads])[0] | |
411 filetype = os.path.splitext(self._single_reads[reads])[1] | |
412 if filetype == ".gz": | |
413 os.system("gunzip -c %s > %s/%s" % (self._single_reads[reads], workingDir, os.path.basename(filename))) | |
414 if filetype == ".bz2": | |
415 os.system("bunzip2 -c %s > %s/%s" % (os.path.abspath(self._single_reads[reads]), workingDir, os.path.basename(filename))) | |
416 if filetype ==".fq": | |
417 filename = self._single_reads[reads] | |
418 l_single_reads.append("%s" % os.path.basename(filename)) | |
419 bowtiePrefix = "%s/%s" % (bowtie_build_Dir, prefixbowtie) | |
420 path = ("%s/%s") % (exeDir,workingDir) | |
421 os.chdir(path) | |
422 iLaunchTophat = Tophat(tophat_Dir, bowtiePrefix, self._type, l_single_reads, self._left_reads, self._right_reads) | |
423 iLaunchTophat.run() | |
424 if self._type == "paired": | |
425 l_left_reads = [] | |
426 l_right_reads = [] | |
427 for reads in range(0, len(self._left_reads)): | |
428 os.symlink("%s" % os.path.abspath(self._left_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._left_reads[reads]))) | |
429 filename = os.path.splitext(self._left_reads[reads])[0] | |
430 filetype = os.path.splitext(self._left_reads[reads])[1] | |
431 ##TODO : check type input file: mimetypes.guess_type(self._single_reads[reads]) | |
432 if filetype == ".gz": | |
433 os.system("gunzip -c %s > %s/%s" % (self._left_reads[reads],workingDir, os.path.basename(filename))) | |
434 if filetype == ".bz2": | |
435 os.system("bunzip2 -c %s > %s/%s" % (self._left_reads[reads],workingDir, os.path.basename(filename))) | |
436 if filetype ==".fq": | |
437 filename = self._left_reads[reads] | |
438 l_left_reads.append("%s" % os.path.basename(filename)) | |
439 | |
440 for reads in range(0, len(self._right_reads)): | |
441 os.symlink("%s" % os.path.abspath(self._right_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._right_reads[reads]))) | |
442 filename = os.path.splitext(self._right_reads[reads])[0] | |
443 filetype = os.path.splitext(self._right_reads[reads])[1] | |
444 if filetype == ".gz": | |
445 os.system("gunzip -c %s > %s/%s" % (self._right_reads[reads],workingDir, os.path.basename(filename))) | |
446 if filetype == ".bz2": | |
447 os.system("bunzip2 -c %s > %s/%s" % (self._right_reads[reads],workingDir, os.path.basename(filename))) | |
448 if filetype ==".fq": | |
449 filename = self._right_reads[reads] | |
450 l_right_reads.append("%s" % os.path.basename(filename)) | |
451 bowtiePrefix = "%s/%s" % (bowtie_build_Dir, prefixbowtie) | |
452 path= ("%s/%s") % (exeDir,workingDir) | |
453 os.chdir(path) | |
454 iLaunchTophat = Tophat(tophat_Dir, bowtiePrefix, self._type, self._single_reads, l_left_reads, l_right_reads) | |
455 iLaunchTophat.run() | |
456 self._log.info("Mapping reads is finished!!!!") | |
457 | |
458 if self._assembly_tool == "cufflinks": | |
459 cufflinks_Dir = "cufflinks" | |
460 accepted_hits = "%s/accepted_hits.bam" % tophat_Dir | |
461 iLaunchCufflinks = Cufflinks(accepted_hits, transcriptsfile , cufflinks_Dir) | |
462 iLaunchCufflinks.run() | |
463 self._log.info("%s is finished!!!!" % self._assembly_tool) | |
464 os.symlink("cufflinks/transcripts.gtf", "transcripts.gtf") | |
465 cuffcompare_Dir = "cuffcompare" | |
466 transcripts = "transcripts.gtf" | |
467 iLaunchCuffcompare = Cuffcompare(transcriptsfile, transcripts, outprefix = "cuffcompare", workingDir = cuffcompare_Dir) | |
468 iLaunchCuffcompare.run() | |
469 self._log.info("Cuffcompare is finished!!!!") | |
470 | |
471 | |
472 bedtools_closest_Dir = "bedtools_closest" | |
473 os.mkdir(bedtools_closest_Dir) | |
474 os.chdir(bedtools_closest_Dir) | |
475 | |
476 transcriptsgtf = "%s_transcripts.gtf" % self._assembly_tool | |
477 os.symlink("../%s/transcripts.gtf" % self._assembly_tool,transcriptsgtf) | |
478 transcriptsbed = "%s_transcripts.bed" % self._assembly_tool | |
479 self.getTranscriptToBed(transcriptsgtf,transcriptsbed) | |
480 TEgff = os.path.basename(os.path.splitext(TEfile)[0]) + ".gff3" | |
481 TEbed = os.path.basename(os.path.splitext(TEfile)[0]) + ".bed" | |
482 os.symlink("%s" % TEfile,TEgff) | |
483 self.getTEGFFToBed(TEgff,TEbed) | |
484 iLauncherBdc= Bedtools_closest(transcriptsbed, TEbed, "bedtools_closest_%s" % transcriptsgtf.split(".")[0]) | |
485 iLauncherBdc.run() | |
486 self._log.info("Bedtools closest is finished!!!!") | |
487 bedtoolsfile = "bedtools_closest_%s" % transcriptsgtf.split(".")[0] | |
488 self.getTEnearPromoter(bedtoolsfile) | |
489 tmap_file = "../cuffcompare/cuffcompare.transcripts.gtf.tmap" | |
490 bedtools_file = "%s_TSSoverlaps_and_TE_closest_TSS_and_inclus_ALL" % bedtoolsfile | |
491 | |
492 self.getClassCodeCuffcompare(tmap_file,bedtools_file) | |
493 os.chdir("..") | |
494 self._log.info("Done!!!!") | |
495 | |
496 except Exception: | |
497 self._logAndRaise("ERROR in TEiso") | |
498 | |
499 | |
500 if __name__ == "__main__": | |
501 iLaunch = LaunchTEiso() | |
502 iLaunch._setAttributesFromCmdLine() | |
503 iLaunch.run() |