13
|
1 #!/usr/bin/env python
|
|
2
|
|
3 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
4 # http://www.inra.fr
|
|
5 # http://urgi.versailles.inra.fr
|
|
6 #
|
|
7 # This software is governed by the CeCILL license under French law and
|
|
8 # abiding by the rules of distribution of free software. You can use,
|
|
9 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
10 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
11 # "http://www.cecill.info".
|
|
12 #
|
|
13 # As a counterpart to the access to the source code and rights to copy,
|
|
14 # modify and redistribute granted by the license, users are provided only
|
|
15 # with a limited warranty and the software's author, the holder of the
|
|
16 # economic rights, and the successive licensors have only limited
|
|
17 # liability.
|
|
18 #
|
|
19 # In this respect, the user's attention is drawn to the risks associated
|
|
20 # with loading, using, modifying and/or developing or reproducing the
|
|
21 # software by the user in light of its specific status of free software,
|
|
22 # that may mean that it is complicated to manipulate, and that also
|
|
23 # therefore means that it is reserved for developers and experienced
|
|
24 # professionals having in-depth computer knowledge. Users are therefore
|
|
25 # encouraged to load and test the software's suitability as regards their
|
|
26 # requirements in conditions enabling the security of their systems and/or
|
|
27 # data to be ensured and, more generally, to use and operate it in the
|
|
28 # same conditions as regards security.
|
|
29 #
|
|
30 # The fact that you are presently reading this means that you have had
|
|
31 # knowledge of the CeCILL license and that you accept its terms.
|
|
32
|
|
33
|
|
34 from commons.core.parsing.GtfParser import GtfParser
|
|
35 from commons.core.parsing.GffParser import GffParser
|
|
36 from TEiso.Bowtie_build import Bowtie_build
|
|
37 from TEiso.Tophat import Tophat
|
|
38 from TEiso.Cufflinks import Cufflinks
|
|
39 from TEiso.Cuffcompare import Cuffcompare
|
|
40 from TEiso.Bedtools_closest import Bedtools_closest
|
|
41 from commons.core.LoggerFactory import LoggerFactory
|
|
42 from commons.core.utils.RepetOptionParser import RepetOptionParser
|
|
43 from commons.core.utils.FileUtils import FileUtils
|
|
44 import os
|
|
45 import time
|
|
46 import re
|
|
47 import sys
|
|
48 LOG_NAME = "repet.TEiso"
|
|
49
|
|
50 class LaunchTEiso(object):
|
|
51
|
|
52 def __init__(self, reference="", input_transcripts="", single_paired="", single_reads="", left_reads="", right_reads="", transposable_element = "", assembly_tool="", verbosity=3):
|
|
53 self._reference = reference
|
|
54 self._transcripts = input_transcripts
|
|
55 self._type = single_paired
|
|
56 self._single_reads = single_reads.split(",")
|
|
57 self._left_reads = left_reads.split(",")
|
|
58 self._right_reads = right_reads.split(",")
|
|
59 self._TE = transposable_element
|
|
60 self._assembly_tool = assembly_tool
|
|
61 self._verbosity = verbosity
|
|
62 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_NAME, self.__class__.__name__), self._verbosity)
|
|
63
|
|
64 def _setAttributesFromCmdLine(self):
|
|
65 self._toolVersion = "0.1"
|
|
66 description = "TEiso version %s" % self._toolVersion
|
|
67 epilog = "\n if reads are single:\n LaunchTEiso.py -f <genome.fa> -g <transcripts.gtf> -e single -s <single_read> -t <TE.gff> -a cufflinks \n"
|
|
68 epilog += " if reads are paired:\n LaunchTEiso.py -f <genome.fa> -g <transcripts.gtf> -e paired -l <reads_left> -r <reads_right> -t <TE.gff> -a cufflinks \n"
|
|
69 parser = RepetOptionParser(description = description, epilog = epilog, version = self._toolVersion)
|
|
70 parser.add_option('-f' , '--input_reference' , dest='input_reference' , help='file with ref sequences')
|
|
71 parser.add_option('-g' , '--input_transcripts', dest='input_transcripts', help='GTF/GFF with known transcripts' , default="")
|
|
72 parser.add_option('-e' , '--single_paired' , dest='single_paired' , help='type of input reads, single or paired end', default="paired")
|
|
73 parser.add_option('-s' , '--single_read' , dest='single_read' , help='a single input read' , default="")
|
|
74 parser.add_option('-l', '--left_read' , dest='left_read' , help='left reads' , default="")
|
|
75 parser.add_option('-r', '--right_read' , dest='right_read' , help='right reads' , default="")
|
|
76 parser.add_option('-t' , '--input_transposable_element', dest='input_transposable_element', help='GFF with known transposable_element' , default="")
|
|
77 parser.add_option('-a' , '--assembly_tool' , dest='assembly_tool' , help='type of RNA-Seq assembly tool' , default="cufflinks")
|
|
78 options = parser.parse_args()[0]
|
|
79 self.setAttributesFromOptions(options)
|
|
80
|
|
81 def setAttributesFromOptions(self, options):
|
|
82 self._reference = options.input_reference
|
|
83 self._transcripts = options.input_transcripts
|
|
84 self._type = options.single_paired
|
|
85 self._single_reads = options.single_read.split(",")
|
|
86 self._left_reads = options.left_read.split(",")
|
|
87 self._right_reads = options.right_read.split(",")
|
|
88 self._TE = options.input_transposable_element
|
|
89 self._assembly_tool = options.assembly_tool
|
|
90
|
|
91 def _logAndRaise(self, errorMsg):
|
|
92 self._log.error(errorMsg)
|
|
93 #raise Exception(errorMsg)
|
|
94 sys.exit(1)
|
|
95
|
|
96 def checkOptions(self):
|
|
97 if self._type == "paired":
|
|
98 if self._single_reads != ['']:
|
|
99 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!")
|
|
100 if self._left_reads == ['']:
|
|
101 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!")
|
|
102 if self._right_reads == ['']:
|
|
103 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!")
|
|
104 if self._right_reads == self._left_reads:
|
|
105 self._logAndRaise("ERROR: -l and -r options are same!")
|
|
106
|
|
107 if self._type == "single":
|
|
108 if self._left_reads != ['']:
|
|
109 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!")
|
|
110 if self._right_reads != ['']:
|
|
111 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!")
|
|
112 if self._single_reads == ['']:
|
|
113 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!")
|
|
114
|
|
115 if self._reference != "":
|
|
116 if not FileUtils.isRessourceExists(self._reference):
|
|
117 self._logAndRaise("ERROR: reference file %s does not exist!" % self._reference)
|
|
118 else:
|
|
119 self._logAndRaise("ERROR: No specified -f option!")
|
|
120
|
|
121 if self._transcripts != "":
|
|
122 if not FileUtils.isRessourceExists(self._transcripts):
|
|
123 self._logAndRaise("ERROR: transcripts file %s does not exist!" % self._transcripts)
|
|
124 else:
|
|
125 self._logAndRaise("ERROR: No specified -g option!")
|
|
126
|
|
127 if self._TE != "":
|
|
128 if not FileUtils.isRessourceExists(self._TE):
|
|
129 self._logAndRaise("ERROR: transposable elements %s does not exist!" % self._TE)
|
|
130 else:
|
|
131 self._logAndRaise("ERROR: No specified -t option!")
|
|
132
|
|
133
|
|
134
|
|
135 def getTranscriptToBed(self, inputFile,outputFile):
|
|
136 try:
|
|
137 filewrite=open(outputFile, "w")
|
|
138 gtfParser = GtfParser(inputFile, assemblyTools=True)
|
|
139 for transcript in gtfParser.getIterator():
|
|
140 if(transcript.getDirection()==1):
|
|
141 strand="+"
|
|
142 else:
|
|
143 strand="-"
|
|
144 filewrite.write("%s\t%s\t%s\t%s\t%s\t%s\t%.3f\n" % (transcript.getChromosome(),transcript.getStart(),
|
|
145 transcript.getEnd(), transcript.getTagValue("ID"), transcript.getTagValue("gene_id"),
|
|
146 strand,float(transcript.getTagValue("FPKM")) ))
|
|
147
|
|
148 filewrite.close()
|
|
149 except:
|
|
150 self._logAndRaise("Couldn't open %s for writing" % outputFile)
|
|
151
|
|
152
|
|
153 def getTEGFFToBed(self, inputFile, outputFile):
|
|
154 """TODO Dont write bed line when the strand is '.'
|
|
155 See Gtf parser option assemblyTools
|
|
156 """
|
|
157 try:
|
|
158 filewrite=open(outputFile, "w")
|
|
159 gffParser = GffParser(inputFile)
|
|
160 for transcript in gffParser.getIterator():
|
|
161 if(transcript.getDirection()==1):
|
|
162 strand="+"
|
|
163 else:
|
|
164 strand="-"
|
|
165 filewrite.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (transcript.getChromosome(),transcript.getStart(),
|
|
166 transcript.getEnd(), transcript.getTagValue("ID").split("_")[0]+"_", transcript.getTagValue("Target").split("_")[0], strand) )
|
|
167
|
|
168 filewrite.close()
|
|
169 except:
|
|
170 self._logAndRaise("Couldn't open %s for writing" % outputFile)
|
|
171
|
|
172
|
|
173 def getTEnearPromoter (self, bedtoolsfile):
|
|
174 #### BEdParser.py in commons is not used because the format of this bed file is different.
|
|
175 # #Chrom starttr endtr transcript_id gene_ID strand fpkm chromte startte endte idte targetTE strandTE distance
|
|
176 #scaffold_1 37570 37785 GSSPFG00034586001-RA GSSPFG00034586001 + 0.0000000000 scaffold_1 33914 40164 ms162_ PotentialHostGene - 0
|
|
177
|
|
178 linelist = []
|
|
179 tmplist = []
|
|
180 with open(bedtoolsfile, "r") as bedFile:
|
|
181 for line in bedFile.readlines():
|
|
182 m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\t+([^\t]+)\t+([^\t]+)\t+([+-])\t+(\d+\.\d+)\t+([^\t]+)+\t+(\d+)\t+(\d+)\t+([^\t]+)+\t+([^\t]+)\t+([+-])\t+([^\t]+)",line)
|
|
183 if(m != None):
|
|
184 start_TR = int(m.group(2))##F[1]
|
|
185 end_TR = int(m.group(3))##F[2]
|
|
186 strand_TR= m.group(6) ##[5]
|
|
187 start_TE = int(m.group(9))##[8]
|
|
188 end_TE = int(m.group(10))##[9]
|
|
189 dist = int(m.group(14))##[13]
|
|
190 if (start_TE < start_TR) and (end_TE < start_TR) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE) and (dist != 0):
|
|
191 tmplist.append(line.strip())
|
|
192 tmplist.append("TE_closest_TSS")
|
|
193 linelist.append(tmplist)
|
|
194 tmplist = []
|
|
195 # F[1] gene F[2]
|
|
196 # =========================>
|
|
197 # ------------
|
|
198 # F[8] F[9]
|
|
199
|
|
200 if (start_TE > end_TR) and (end_TE > end_TR) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE) and (dist != 0):
|
|
201 tmplist.append(line.strip())
|
|
202 tmplist.append("TE_closest_TSS")
|
|
203 linelist.append(tmplist)
|
|
204 tmplist = []
|
|
205
|
|
206 # F[1] F[2]
|
|
207 # <======================
|
|
208 # ---------------
|
|
209
|
|
210 if (start_TE <= start_TR) and (start_TR < end_TE) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE):
|
|
211 for i in range(0,len(line.split("\t"))-1):
|
|
212 tmplist.append(line.split("\t")[i])
|
|
213 overlap = (end_TE-start_TR)+1
|
|
214 tmplist.append(overlap)
|
|
215 tmplist.append("TE_overlap_TSS")
|
|
216 linelist.append(tmplist)
|
|
217 tmplist = []
|
|
218
|
|
219 # F[1] gene F[2]
|
|
220 # =========================>
|
|
221 # -------------
|
|
222 # F[8] F[9]
|
|
223
|
|
224 # gene
|
|
225 # F[1]=========================>F[2]
|
|
226
|
|
227 # F[8]---------------F[9]
|
|
228
|
|
229
|
|
230 if (start_TE < start_TR) and (start_TR == end_TE) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE):
|
|
231 for i in range(0,len(line.split("\t"))-1):
|
|
232 tmplist.append(line.split("\t")[i])
|
|
233 tmplist.append(0)
|
|
234 tmplist.append("TE_overlap_TSS")
|
|
235 linelist.append(tmplist)
|
|
236 tmplist = []
|
|
237
|
|
238 ## F[1]=============================>F[2]
|
|
239 ## F[8]---------------F[9]
|
|
240
|
|
241
|
|
242 if (start_TE < end_TR) and (end_TR <= end_TE) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE):
|
|
243 for i in range(0,len(line.split("\t"))-1):
|
|
244 tmplist.append(line.split("\t")[i])
|
|
245 overlap = (end_TR-start_TE)+1
|
|
246 tmplist.append(overlap)
|
|
247 tmplist.append("TE_overlap_TSS")
|
|
248 linelist.append(tmplist)
|
|
249 tmplist = []
|
|
250
|
|
251
|
|
252 # F[1]<======================F[2]
|
|
253 # ---------------
|
|
254 # F[8] F[9]
|
|
255 #
|
|
256 #
|
|
257 # F[1]<=============================F[2]
|
|
258 # F[8]---------------F[9]
|
|
259
|
|
260 if (start_TE == end_TR) and (end_TR < end_TE) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE):
|
|
261 for i in range(0,len(line.split("\t"))-1):
|
|
262 tmplist.append(line.split("\t")[i])
|
|
263 tmplist.append(0)
|
|
264 tmplist.append("TE_overlap_TSS")
|
|
265 linelist.append(tmplist)
|
|
266 tmplist = []
|
|
267
|
|
268 # F[1]<=============================F[2]
|
|
269 # F[8]---------------F[9]
|
|
270
|
|
271 if (start_TR < start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE < end_TR) and (dist == 0):
|
|
272 tmplist.append(line.strip())
|
|
273 tmplist.append("TE_in_gene")
|
|
274 linelist.append(tmplist)
|
|
275 tmplist = []
|
|
276
|
|
277 # F[1] gene F[2]
|
|
278 # ==============================
|
|
279 # -----------
|
|
280 # F[8] F[9]
|
|
281
|
|
282
|
|
283 if (start_TE < start_TR) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TR < end_TE):
|
|
284 for i in range(0,len(line.split("\t"))-1):
|
|
285 tmplist.append(line.split("\t")[i])
|
|
286 lenTE = (end_TE-start_TE)+1
|
|
287 tmplist.append(lenTE)
|
|
288 tmplist.append("gene_in_TE")
|
|
289 linelist.append(tmplist)
|
|
290 tmplist = []
|
|
291
|
|
292 # F[1]======================F[2]
|
|
293 # F[8]----------------------------------------------------F[9]
|
|
294
|
|
295
|
|
296 if (strand_TR =="+") and (start_TR > start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE == end_TR):
|
|
297 tmplist.append(line.strip())
|
|
298 tmplist.append("gene_in_TE")
|
|
299 linelist.append(tmplist)
|
|
300 tmplist = []
|
|
301
|
|
302 # F[1]==================================>F[2]
|
|
303 # F[8]----------------------------------------------------------F[9]
|
|
304
|
|
305 if (strand_TR =="-") and (start_TR > start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE == end_TR):
|
|
306 tmplist.append(line.strip())
|
|
307 tmplist.append("gene_in_TE")
|
|
308 linelist.append(tmplist)
|
|
309 tmplist = []
|
|
310
|
|
311 # F[1]<==================================F[2]
|
|
312 # F[8]----------------------------------------------------------F[9]
|
|
313
|
|
314 if (strand_TR =="+") and (start_TR == start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE > end_TR):
|
|
315 tmplist.append(line.strip())
|
|
316 tmplist.append("gene_in_TE")
|
|
317 linelist.append(tmplist)
|
|
318 tmplist = []
|
|
319
|
|
320 # F[1]==================================>F[2]
|
|
321 # F[8]----------------------------------------------------------F[9]
|
|
322
|
|
323 if (strand_TR =="-") and (start_TR == start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE > end_TR):
|
|
324 tmplist.append(line.strip())
|
|
325 tmplist.append("gene_in_TE")
|
|
326 linelist.append(tmplist)
|
|
327 tmplist = []
|
|
328
|
|
329 # F[1]<==================================F[2]
|
|
330 # F[8]----------------------------------------------------------F[9]
|
|
331
|
|
332 favorablecases = "%s_TSSoverlaps_and_TE_closest_TSS_and_inclus_ALL" % bedtoolsfile
|
|
333 w = open(favorablecases,'w')
|
|
334 for s in linelist:
|
|
335 line= "\t".join(str(item) for item in s)
|
|
336 w.write("%s\n" % line)
|
|
337 w.close()
|
|
338
|
|
339
|
|
340 def getClassCodeCuffcompare(self, tmap_file, bedtools_file):
|
|
341 class_code_dic = {}
|
|
342 lcode_ref = []
|
|
343 tmp = []
|
|
344 linetowrite =[]
|
|
345 with open(tmap_file) as tmap:
|
|
346 tmapline = tmap.readlines()
|
|
347 for i in range(1,len(tmapline)):
|
|
348 cuff_id = tmapline[i].split("\t")[4].strip()
|
|
349 class_code = tmapline[i].split("\t")[2].strip()
|
|
350 ref_id = tmapline[i].split("\t")[1].strip()
|
|
351 lcode_ref.append(class_code)
|
|
352 lcode_ref.append(ref_id)
|
|
353 class_code_dic[cuff_id] = lcode_ref
|
|
354 lcode_ref = []
|
|
355
|
|
356 with open(bedtools_file) as bedtools:
|
|
357 bedtoolsline = bedtools.readlines()
|
|
358 for i in xrange(0,len(bedtoolsline)):
|
|
359 tmp = bedtoolsline[i].strip().split("\t")
|
|
360 transcript_bedtools = bedtoolsline[i].split("\t")[3].strip()
|
|
361 if transcript_bedtools in class_code_dic.keys():
|
|
362 tmp.append(class_code_dic[transcript_bedtools][0])
|
|
363 tmp.append(class_code_dic[transcript_bedtools][1])
|
|
364 else:
|
|
365 tmp.append("NA")
|
|
366 tmp.append("NA")
|
|
367
|
|
368 linetowrite.append(tmp)
|
|
369 tmp=[]
|
|
370
|
|
371
|
|
372 output = "%s_with_Ref" % bedtools_file
|
|
373 w = open(output,'w')
|
|
374 line = ""
|
|
375 for i in xrange(0,len(linetowrite)):
|
|
376 for j in range(0,17):
|
|
377 line = line + linetowrite[i][j] + "\t"
|
|
378 w.write(line)
|
|
379 w.write("\n")
|
|
380 line = ""
|
|
381 w.close()
|
|
382
|
|
383 def run(self):
|
|
384 self.checkOptions()
|
|
385 try:
|
|
386 LoggerFactory.setLevel(self._log, self._verbosity)
|
|
387 exeDir = os.getcwd()
|
|
388 workingDir = "out_TEiso_%s" % time.strftime("%Y%m%d%H%M%S")
|
|
389 if os.path.exists(workingDir):
|
|
390 self._logAndRaise("ERROR: %s already exists." % workingDir)
|
|
391 os.mkdir(workingDir)
|
|
392 referencefile = os.path.abspath(self._reference)
|
|
393 transcriptsfile = os.path.abspath(self._transcripts)
|
|
394 TEfile = os.path.abspath(self._TE)
|
|
395 print "workingDir >>>>> ",workingDir
|
|
396 os.symlink("%s" % os.path.abspath(self._reference), "%s/%s" % (workingDir, os.path.basename(self._reference)))
|
|
397 os.symlink("%s" % os.path.abspath(self._transcripts), "%s/%s" % (workingDir, os.path.basename(self._transcripts)))
|
|
398 os.chdir(workingDir)
|
|
399 bowtie_build_Dir = "bowtie_build"
|
|
400 prefixbowtie = os.path.basename(self._reference).split(".")[0]
|
|
401 iLaunchBowtie = Bowtie_build(referencefile, prefixbowtie, bowtie_build_Dir)
|
|
402 iLaunchBowtie.run()
|
|
403 os.chdir(exeDir)
|
|
404 self._log.info("Indexing genome is finished!!!!")
|
|
405 tophat_Dir = "tophat"
|
|
406 if self._type == "single":
|
|
407 l_single_reads = []
|
|
408 for reads in range(0, len(self._single_reads)):
|
|
409 os.symlink("%s" % os.path.abspath(self._single_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._single_reads[reads])))
|
|
410 filename = os.path.splitext(self._single_reads[reads])[0]
|
|
411 filetype = os.path.splitext(self._single_reads[reads])[1]
|
|
412 if filetype == ".gz":
|
|
413 os.system("gunzip -c %s > %s/%s" % (self._single_reads[reads], workingDir, os.path.basename(filename)))
|
|
414 if filetype == ".bz2":
|
|
415 os.system("bunzip2 -c %s > %s/%s" % (os.path.abspath(self._single_reads[reads]), workingDir, os.path.basename(filename)))
|
|
416 if filetype ==".fq":
|
|
417 filename = self._single_reads[reads]
|
|
418 l_single_reads.append("%s" % os.path.basename(filename))
|
|
419 bowtiePrefix = "%s/%s" % (bowtie_build_Dir, prefixbowtie)
|
|
420 path = ("%s/%s") % (exeDir,workingDir)
|
|
421 os.chdir(path)
|
|
422 iLaunchTophat = Tophat(tophat_Dir, bowtiePrefix, self._type, l_single_reads, self._left_reads, self._right_reads)
|
|
423 iLaunchTophat.run()
|
|
424 if self._type == "paired":
|
|
425 l_left_reads = []
|
|
426 l_right_reads = []
|
|
427 for reads in range(0, len(self._left_reads)):
|
|
428 os.symlink("%s" % os.path.abspath(self._left_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._left_reads[reads])))
|
|
429 filename = os.path.splitext(self._left_reads[reads])[0]
|
|
430 filetype = os.path.splitext(self._left_reads[reads])[1]
|
|
431 ##TODO : check type input file: mimetypes.guess_type(self._single_reads[reads])
|
|
432 if filetype == ".gz":
|
|
433 os.system("gunzip -c %s > %s/%s" % (self._left_reads[reads],workingDir, os.path.basename(filename)))
|
|
434 if filetype == ".bz2":
|
|
435 os.system("bunzip2 -c %s > %s/%s" % (self._left_reads[reads],workingDir, os.path.basename(filename)))
|
|
436 if filetype ==".fq":
|
|
437 filename = self._left_reads[reads]
|
|
438 l_left_reads.append("%s" % os.path.basename(filename))
|
|
439
|
|
440 for reads in range(0, len(self._right_reads)):
|
|
441 os.symlink("%s" % os.path.abspath(self._right_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._right_reads[reads])))
|
|
442 filename = os.path.splitext(self._right_reads[reads])[0]
|
|
443 filetype = os.path.splitext(self._right_reads[reads])[1]
|
|
444 if filetype == ".gz":
|
|
445 os.system("gunzip -c %s > %s/%s" % (self._right_reads[reads],workingDir, os.path.basename(filename)))
|
|
446 if filetype == ".bz2":
|
|
447 os.system("bunzip2 -c %s > %s/%s" % (self._right_reads[reads],workingDir, os.path.basename(filename)))
|
|
448 if filetype ==".fq":
|
|
449 filename = self._right_reads[reads]
|
|
450 l_right_reads.append("%s" % os.path.basename(filename))
|
|
451 bowtiePrefix = "%s/%s" % (bowtie_build_Dir, prefixbowtie)
|
|
452 path= ("%s/%s") % (exeDir,workingDir)
|
|
453 os.chdir(path)
|
|
454 iLaunchTophat = Tophat(tophat_Dir, bowtiePrefix, self._type, self._single_reads, l_left_reads, l_right_reads)
|
|
455 iLaunchTophat.run()
|
|
456 self._log.info("Mapping reads is finished!!!!")
|
|
457
|
|
458 if self._assembly_tool == "cufflinks":
|
|
459 cufflinks_Dir = "cufflinks"
|
|
460 accepted_hits = "%s/accepted_hits.bam" % tophat_Dir
|
|
461 iLaunchCufflinks = Cufflinks(accepted_hits, transcriptsfile , cufflinks_Dir)
|
|
462 iLaunchCufflinks.run()
|
|
463 self._log.info("%s is finished!!!!" % self._assembly_tool)
|
|
464 os.symlink("cufflinks/transcripts.gtf", "transcripts.gtf")
|
|
465 cuffcompare_Dir = "cuffcompare"
|
|
466 transcripts = "transcripts.gtf"
|
|
467 iLaunchCuffcompare = Cuffcompare(transcriptsfile, transcripts, outprefix = "cuffcompare", workingDir = cuffcompare_Dir)
|
|
468 iLaunchCuffcompare.run()
|
|
469 self._log.info("Cuffcompare is finished!!!!")
|
|
470
|
|
471
|
|
472 bedtools_closest_Dir = "bedtools_closest"
|
|
473 os.mkdir(bedtools_closest_Dir)
|
|
474 os.chdir(bedtools_closest_Dir)
|
|
475
|
|
476 transcriptsgtf = "%s_transcripts.gtf" % self._assembly_tool
|
|
477 os.symlink("../%s/transcripts.gtf" % self._assembly_tool,transcriptsgtf)
|
|
478 transcriptsbed = "%s_transcripts.bed" % self._assembly_tool
|
|
479 self.getTranscriptToBed(transcriptsgtf,transcriptsbed)
|
|
480 TEgff = os.path.basename(os.path.splitext(TEfile)[0]) + ".gff3"
|
|
481 TEbed = os.path.basename(os.path.splitext(TEfile)[0]) + ".bed"
|
|
482 os.symlink("%s" % TEfile,TEgff)
|
|
483 self.getTEGFFToBed(TEgff,TEbed)
|
|
484 iLauncherBdc= Bedtools_closest(transcriptsbed, TEbed, "bedtools_closest_%s" % transcriptsgtf.split(".")[0])
|
|
485 iLauncherBdc.run()
|
|
486 self._log.info("Bedtools closest is finished!!!!")
|
|
487 bedtoolsfile = "bedtools_closest_%s" % transcriptsgtf.split(".")[0]
|
|
488 self.getTEnearPromoter(bedtoolsfile)
|
|
489 tmap_file = "../cuffcompare/cuffcompare.transcripts.gtf.tmap"
|
|
490 bedtools_file = "%s_TSSoverlaps_and_TE_closest_TSS_and_inclus_ALL" % bedtoolsfile
|
|
491
|
|
492 self.getClassCodeCuffcompare(tmap_file,bedtools_file)
|
|
493 os.chdir("..")
|
|
494 self._log.info("Done!!!!")
|
|
495
|
|
496 except Exception:
|
|
497 self._logAndRaise("ERROR in TEiso")
|
|
498
|
|
499
|
|
500 if __name__ == "__main__":
|
|
501 iLaunch = LaunchTEiso()
|
|
502 iLaunch._setAttributesFromCmdLine()
|
|
503 iLaunch.run()
|