annotate TEisotools-1.1.a/TEiso/LaunchTEiso.py @ 15:255c852351c5 draft

Uploaded
author urgi-team
date Thu, 21 Jul 2016 07:36:44 -0400
parents feef9a0db09d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
1 #!/usr/bin/env python
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
2
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
3 # Copyright INRA (Institut National de la Recherche Agronomique)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
4 # http://www.inra.fr
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
5 # http://urgi.versailles.inra.fr
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
6 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
7 # This software is governed by the CeCILL license under French law and
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
8 # abiding by the rules of distribution of free software. You can use,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
9 # modify and/ or redistribute the software under the terms of the CeCILL
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
10 # license as circulated by CEA, CNRS and INRIA at the following URL
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
11 # "http://www.cecill.info".
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
12 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
13 # As a counterpart to the access to the source code and rights to copy,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
14 # modify and redistribute granted by the license, users are provided only
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
15 # with a limited warranty and the software's author, the holder of the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
16 # economic rights, and the successive licensors have only limited
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
17 # liability.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
18 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
19 # In this respect, the user's attention is drawn to the risks associated
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
20 # with loading, using, modifying and/or developing or reproducing the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
21 # software by the user in light of its specific status of free software,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
22 # that may mean that it is complicated to manipulate, and that also
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
23 # therefore means that it is reserved for developers and experienced
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
24 # professionals having in-depth computer knowledge. Users are therefore
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
25 # encouraged to load and test the software's suitability as regards their
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
26 # requirements in conditions enabling the security of their systems and/or
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
27 # data to be ensured and, more generally, to use and operate it in the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
28 # same conditions as regards security.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
29 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
30 # The fact that you are presently reading this means that you have had
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
31 # knowledge of the CeCILL license and that you accept its terms.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
32
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
33
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
34 from commons.core.parsing.GtfParser import GtfParser
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
35 from commons.core.parsing.GffParser import GffParser
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
36 from TEiso.Bowtie_build import Bowtie_build
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
37 from TEiso.Tophat import Tophat
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
38 from TEiso.Cufflinks import Cufflinks
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
39 from TEiso.Cuffcompare import Cuffcompare
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
40 from TEiso.Bedtools_closest import Bedtools_closest
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
41 from commons.core.LoggerFactory import LoggerFactory
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
42 from commons.core.utils.RepetOptionParser import RepetOptionParser
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
43 from commons.core.utils.FileUtils import FileUtils
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
44 import os
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
45 import time
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
46 import re
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
47 import sys
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
48 LOG_NAME = "repet.TEiso"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
49
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
50 class LaunchTEiso(object):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
51
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
52 def __init__(self, reference="", input_transcripts="", single_paired="", single_reads="", left_reads="", right_reads="", transposable_element = "", assembly_tool="", verbosity=3):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
53 self._reference = reference
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
54 self._transcripts = input_transcripts
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
55 self._type = single_paired
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
56 self._single_reads = single_reads.split(",")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
57 self._left_reads = left_reads.split(",")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
58 self._right_reads = right_reads.split(",")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
59 self._TE = transposable_element
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
60 self._assembly_tool = assembly_tool
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
61 self._verbosity = verbosity
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
62 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_NAME, self.__class__.__name__), self._verbosity)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
63
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
64 def _setAttributesFromCmdLine(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
65 self._toolVersion = "0.1"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
66 description = "TEiso version %s" % self._toolVersion
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
67 epilog = "\n if reads are single:\n LaunchTEiso.py -f <genome.fa> -g <transcripts.gtf> -e single -s <single_read> -t <TE.gff> -a cufflinks \n"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
68 epilog += " if reads are paired:\n LaunchTEiso.py -f <genome.fa> -g <transcripts.gtf> -e paired -l <reads_left> -r <reads_right> -t <TE.gff> -a cufflinks \n"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
69 parser = RepetOptionParser(description = description, epilog = epilog, version = self._toolVersion)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
70 parser.add_option('-f' , '--input_reference' , dest='input_reference' , help='file with ref sequences')
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
71 parser.add_option('-g' , '--input_transcripts', dest='input_transcripts', help='GTF/GFF with known transcripts' , default="")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
72 parser.add_option('-e' , '--single_paired' , dest='single_paired' , help='type of input reads, single or paired end', default="paired")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
73 parser.add_option('-s' , '--single_read' , dest='single_read' , help='a single input read' , default="")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
74 parser.add_option('-l', '--left_read' , dest='left_read' , help='left reads' , default="")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
75 parser.add_option('-r', '--right_read' , dest='right_read' , help='right reads' , default="")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
76 parser.add_option('-t' , '--input_transposable_element', dest='input_transposable_element', help='GFF with known transposable_element' , default="")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
77 parser.add_option('-a' , '--assembly_tool' , dest='assembly_tool' , help='type of RNA-Seq assembly tool' , default="cufflinks")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
78 options = parser.parse_args()[0]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
79 self.setAttributesFromOptions(options)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
80
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
81 def setAttributesFromOptions(self, options):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
82 self._reference = options.input_reference
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
83 self._transcripts = options.input_transcripts
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
84 self._type = options.single_paired
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
85 self._single_reads = options.single_read.split(",")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
86 self._left_reads = options.left_read.split(",")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
87 self._right_reads = options.right_read.split(",")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
88 self._TE = options.input_transposable_element
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
89 self._assembly_tool = options.assembly_tool
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
90
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
91 def _logAndRaise(self, errorMsg):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
92 self._log.error(errorMsg)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
93 #raise Exception(errorMsg)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
94 sys.exit(1)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
95
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
96 def checkOptions(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
97 if self._type == "paired":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
98 if self._single_reads != ['']:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
99 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
100 if self._left_reads == ['']:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
101 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
102 if self._right_reads == ['']:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
103 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
104 if self._right_reads == self._left_reads:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
105 self._logAndRaise("ERROR: -l and -r options are same!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
106
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
107 if self._type == "single":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
108 if self._left_reads != ['']:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
109 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
110 if self._right_reads != ['']:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
111 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
112 if self._single_reads == ['']:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
113 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
114
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
115 if self._reference != "":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
116 if not FileUtils.isRessourceExists(self._reference):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
117 self._logAndRaise("ERROR: reference file %s does not exist!" % self._reference)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
118 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
119 self._logAndRaise("ERROR: No specified -f option!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
120
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
121 if self._transcripts != "":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
122 if not FileUtils.isRessourceExists(self._transcripts):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
123 self._logAndRaise("ERROR: transcripts file %s does not exist!" % self._transcripts)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
124 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
125 self._logAndRaise("ERROR: No specified -g option!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
126
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
127 if self._TE != "":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
128 if not FileUtils.isRessourceExists(self._TE):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
129 self._logAndRaise("ERROR: transposable elements %s does not exist!" % self._TE)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
130 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
131 self._logAndRaise("ERROR: No specified -t option!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
132
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
133
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
134
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
135 def getTranscriptToBed(self, inputFile,outputFile):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
136 try:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
137 filewrite=open(outputFile, "w")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
138 gtfParser = GtfParser(inputFile, assemblyTools=True)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
139 for transcript in gtfParser.getIterator():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
140 if(transcript.getDirection()==1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
141 strand="+"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
142 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
143 strand="-"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
144 filewrite.write("%s\t%s\t%s\t%s\t%s\t%s\t%.3f\n" % (transcript.getChromosome(),transcript.getStart(),
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
145 transcript.getEnd(), transcript.getTagValue("ID"), transcript.getTagValue("gene_id"),
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
146 strand,float(transcript.getTagValue("FPKM")) ))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
147
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
148 filewrite.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
149 except:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
150 self._logAndRaise("Couldn't open %s for writing" % outputFile)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
151
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
152
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
153 def getTEGFFToBed(self, inputFile, outputFile):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
154 """TODO Dont write bed line when the strand is '.'
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
155 See Gtf parser option assemblyTools
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
156 """
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
157 try:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
158 filewrite=open(outputFile, "w")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
159 gffParser = GffParser(inputFile)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
160 for transcript in gffParser.getIterator():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
161 if(transcript.getDirection()==1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
162 strand="+"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
163 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
164 strand="-"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
165 filewrite.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (transcript.getChromosome(),transcript.getStart(),
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
166 transcript.getEnd(), transcript.getTagValue("ID").split("_")[0]+"_", transcript.getTagValue("Target").split("_")[0], strand) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
167
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
168 filewrite.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
169 except:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
170 self._logAndRaise("Couldn't open %s for writing" % outputFile)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
171
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
172
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
173 def getTEnearPromoter (self, bedtoolsfile):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
174 #### BEdParser.py in commons is not used because the format of this bed file is different.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
175 # #Chrom starttr endtr transcript_id gene_ID strand fpkm chromte startte endte idte targetTE strandTE distance
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
176 #scaffold_1 37570 37785 GSSPFG00034586001-RA GSSPFG00034586001 + 0.0000000000 scaffold_1 33914 40164 ms162_ PotentialHostGene - 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
177
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
178 linelist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
179 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
180 with open(bedtoolsfile, "r") as bedFile:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
181 for line in bedFile.readlines():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
182 m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\t+([^\t]+)\t+([^\t]+)\t+([+-])\t+(\d+\.\d+)\t+([^\t]+)+\t+(\d+)\t+(\d+)\t+([^\t]+)+\t+([^\t]+)\t+([+-])\t+([^\t]+)",line)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
183 if(m != None):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
184 start_TR = int(m.group(2))##F[1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
185 end_TR = int(m.group(3))##F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
186 strand_TR= m.group(6) ##[5]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
187 start_TE = int(m.group(9))##[8]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
188 end_TE = int(m.group(10))##[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
189 dist = int(m.group(14))##[13]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
190 if (start_TE < start_TR) and (end_TE < start_TR) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE) and (dist != 0):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
191 tmplist.append(line.strip())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
192 tmplist.append("TE_closest_TSS")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
193 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
194 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
195 # F[1] gene F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
196 # =========================>
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
197 # ------------
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
198 # F[8] F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
199
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
200 if (start_TE > end_TR) and (end_TE > end_TR) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE) and (dist != 0):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
201 tmplist.append(line.strip())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
202 tmplist.append("TE_closest_TSS")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
203 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
204 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
205
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
206 # F[1] F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
207 # <======================
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
208 # ---------------
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
209
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
210 if (start_TE <= start_TR) and (start_TR < end_TE) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
211 for i in range(0,len(line.split("\t"))-1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
212 tmplist.append(line.split("\t")[i])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
213 overlap = (end_TE-start_TR)+1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
214 tmplist.append(overlap)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
215 tmplist.append("TE_overlap_TSS")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
216 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
217 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
218
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
219 # F[1] gene F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
220 # =========================>
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
221 # -------------
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
222 # F[8] F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
223
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
224 # gene
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
225 # F[1]=========================>F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
226
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
227 # F[8]---------------F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
228
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
229
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
230 if (start_TE < start_TR) and (start_TR == end_TE) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
231 for i in range(0,len(line.split("\t"))-1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
232 tmplist.append(line.split("\t")[i])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
233 tmplist.append(0)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
234 tmplist.append("TE_overlap_TSS")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
235 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
236 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
237
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
238 ## F[1]=============================>F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
239 ## F[8]---------------F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
240
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
241
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
242 if (start_TE < end_TR) and (end_TR <= end_TE) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
243 for i in range(0,len(line.split("\t"))-1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
244 tmplist.append(line.split("\t")[i])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
245 overlap = (end_TR-start_TE)+1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
246 tmplist.append(overlap)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
247 tmplist.append("TE_overlap_TSS")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
248 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
249 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
250
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
251
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
252 # F[1]<======================F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
253 # ---------------
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
254 # F[8] F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
255 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
256 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
257 # F[1]<=============================F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
258 # F[8]---------------F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
259
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
260 if (start_TE == end_TR) and (end_TR < end_TE) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
261 for i in range(0,len(line.split("\t"))-1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
262 tmplist.append(line.split("\t")[i])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
263 tmplist.append(0)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
264 tmplist.append("TE_overlap_TSS")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
265 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
266 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
267
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
268 # F[1]<=============================F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
269 # F[8]---------------F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
270
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
271 if (start_TR < start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE < end_TR) and (dist == 0):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
272 tmplist.append(line.strip())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
273 tmplist.append("TE_in_gene")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
274 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
275 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
276
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
277 # F[1] gene F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
278 # ==============================
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
279 # -----------
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
280 # F[8] F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
281
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
282
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
283 if (start_TE < start_TR) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TR < end_TE):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
284 for i in range(0,len(line.split("\t"))-1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
285 tmplist.append(line.split("\t")[i])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
286 lenTE = (end_TE-start_TE)+1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
287 tmplist.append(lenTE)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
288 tmplist.append("gene_in_TE")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
289 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
290 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
291
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
292 # F[1]======================F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
293 # F[8]----------------------------------------------------F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
294
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
295
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
296 if (strand_TR =="+") and (start_TR > start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE == end_TR):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
297 tmplist.append(line.strip())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
298 tmplist.append("gene_in_TE")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
299 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
300 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
301
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
302 # F[1]==================================>F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
303 # F[8]----------------------------------------------------------F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
304
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
305 if (strand_TR =="-") and (start_TR > start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE == end_TR):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
306 tmplist.append(line.strip())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
307 tmplist.append("gene_in_TE")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
308 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
309 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
310
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
311 # F[1]<==================================F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
312 # F[8]----------------------------------------------------------F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
313
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
314 if (strand_TR =="+") and (start_TR == start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE > end_TR):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
315 tmplist.append(line.strip())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
316 tmplist.append("gene_in_TE")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
317 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
318 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
319
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
320 # F[1]==================================>F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
321 # F[8]----------------------------------------------------------F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
322
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
323 if (strand_TR =="-") and (start_TR == start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE > end_TR):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
324 tmplist.append(line.strip())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
325 tmplist.append("gene_in_TE")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
326 linelist.append(tmplist)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
327 tmplist = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
328
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
329 # F[1]<==================================F[2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
330 # F[8]----------------------------------------------------------F[9]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
331
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
332 favorablecases = "%s_TSSoverlaps_and_TE_closest_TSS_and_inclus_ALL" % bedtoolsfile
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
333 w = open(favorablecases,'w')
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
334 for s in linelist:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
335 line= "\t".join(str(item) for item in s)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
336 w.write("%s\n" % line)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
337 w.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
338
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
339
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
340 def getClassCodeCuffcompare(self, tmap_file, bedtools_file):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
341 class_code_dic = {}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
342 lcode_ref = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
343 tmp = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
344 linetowrite =[]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
345 with open(tmap_file) as tmap:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
346 tmapline = tmap.readlines()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
347 for i in range(1,len(tmapline)):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
348 cuff_id = tmapline[i].split("\t")[4].strip()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
349 class_code = tmapline[i].split("\t")[2].strip()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
350 ref_id = tmapline[i].split("\t")[1].strip()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
351 lcode_ref.append(class_code)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
352 lcode_ref.append(ref_id)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
353 class_code_dic[cuff_id] = lcode_ref
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
354 lcode_ref = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
355
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
356 with open(bedtools_file) as bedtools:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
357 bedtoolsline = bedtools.readlines()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
358 for i in xrange(0,len(bedtoolsline)):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
359 tmp = bedtoolsline[i].strip().split("\t")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
360 transcript_bedtools = bedtoolsline[i].split("\t")[3].strip()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
361 if transcript_bedtools in class_code_dic.keys():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
362 tmp.append(class_code_dic[transcript_bedtools][0])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
363 tmp.append(class_code_dic[transcript_bedtools][1])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
364 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
365 tmp.append("NA")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
366 tmp.append("NA")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
367
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
368 linetowrite.append(tmp)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
369 tmp=[]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
370
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
371
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
372 output = "%s_with_Ref" % bedtools_file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
373 w = open(output,'w')
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
374 line = ""
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
375 for i in xrange(0,len(linetowrite)):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
376 for j in range(0,17):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
377 line = line + linetowrite[i][j] + "\t"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
378 w.write(line)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
379 w.write("\n")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
380 line = ""
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
381 w.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
382
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
383 def run(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
384 self.checkOptions()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
385 try:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
386 LoggerFactory.setLevel(self._log, self._verbosity)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
387 exeDir = os.getcwd()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
388 workingDir = "out_TEiso_%s" % time.strftime("%Y%m%d%H%M%S")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
389 if os.path.exists(workingDir):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
390 self._logAndRaise("ERROR: %s already exists." % workingDir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
391 os.mkdir(workingDir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
392 referencefile = os.path.abspath(self._reference)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
393 transcriptsfile = os.path.abspath(self._transcripts)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
394 TEfile = os.path.abspath(self._TE)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
395 print "workingDir >>>>> ",workingDir
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
396 os.symlink("%s" % os.path.abspath(self._reference), "%s/%s" % (workingDir, os.path.basename(self._reference)))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
397 os.symlink("%s" % os.path.abspath(self._transcripts), "%s/%s" % (workingDir, os.path.basename(self._transcripts)))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
398 os.chdir(workingDir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
399 bowtie_build_Dir = "bowtie_build"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
400 prefixbowtie = os.path.basename(self._reference).split(".")[0]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
401 iLaunchBowtie = Bowtie_build(referencefile, prefixbowtie, bowtie_build_Dir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
402 iLaunchBowtie.run()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
403 os.chdir(exeDir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
404 self._log.info("Indexing genome is finished!!!!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
405 tophat_Dir = "tophat"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
406 if self._type == "single":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
407 l_single_reads = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
408 for reads in range(0, len(self._single_reads)):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
409 os.symlink("%s" % os.path.abspath(self._single_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._single_reads[reads])))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
410 filename = os.path.splitext(self._single_reads[reads])[0]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
411 filetype = os.path.splitext(self._single_reads[reads])[1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
412 if filetype == ".gz":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
413 os.system("gunzip -c %s > %s/%s" % (self._single_reads[reads], workingDir, os.path.basename(filename)))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
414 if filetype == ".bz2":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
415 os.system("bunzip2 -c %s > %s/%s" % (os.path.abspath(self._single_reads[reads]), workingDir, os.path.basename(filename)))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
416 if filetype ==".fq":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
417 filename = self._single_reads[reads]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
418 l_single_reads.append("%s" % os.path.basename(filename))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
419 bowtiePrefix = "%s/%s" % (bowtie_build_Dir, prefixbowtie)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
420 path = ("%s/%s") % (exeDir,workingDir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
421 os.chdir(path)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
422 iLaunchTophat = Tophat(tophat_Dir, bowtiePrefix, self._type, l_single_reads, self._left_reads, self._right_reads)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
423 iLaunchTophat.run()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
424 if self._type == "paired":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
425 l_left_reads = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
426 l_right_reads = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
427 for reads in range(0, len(self._left_reads)):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
428 os.symlink("%s" % os.path.abspath(self._left_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._left_reads[reads])))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
429 filename = os.path.splitext(self._left_reads[reads])[0]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
430 filetype = os.path.splitext(self._left_reads[reads])[1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
431 ##TODO : check type input file: mimetypes.guess_type(self._single_reads[reads])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
432 if filetype == ".gz":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
433 os.system("gunzip -c %s > %s/%s" % (self._left_reads[reads],workingDir, os.path.basename(filename)))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
434 if filetype == ".bz2":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
435 os.system("bunzip2 -c %s > %s/%s" % (self._left_reads[reads],workingDir, os.path.basename(filename)))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
436 if filetype ==".fq":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
437 filename = self._left_reads[reads]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
438 l_left_reads.append("%s" % os.path.basename(filename))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
439
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
440 for reads in range(0, len(self._right_reads)):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
441 os.symlink("%s" % os.path.abspath(self._right_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._right_reads[reads])))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
442 filename = os.path.splitext(self._right_reads[reads])[0]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
443 filetype = os.path.splitext(self._right_reads[reads])[1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
444 if filetype == ".gz":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
445 os.system("gunzip -c %s > %s/%s" % (self._right_reads[reads],workingDir, os.path.basename(filename)))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
446 if filetype == ".bz2":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
447 os.system("bunzip2 -c %s > %s/%s" % (self._right_reads[reads],workingDir, os.path.basename(filename)))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
448 if filetype ==".fq":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
449 filename = self._right_reads[reads]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
450 l_right_reads.append("%s" % os.path.basename(filename))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
451 bowtiePrefix = "%s/%s" % (bowtie_build_Dir, prefixbowtie)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
452 path= ("%s/%s") % (exeDir,workingDir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
453 os.chdir(path)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
454 iLaunchTophat = Tophat(tophat_Dir, bowtiePrefix, self._type, self._single_reads, l_left_reads, l_right_reads)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
455 iLaunchTophat.run()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
456 self._log.info("Mapping reads is finished!!!!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
457
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
458 if self._assembly_tool == "cufflinks":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
459 cufflinks_Dir = "cufflinks"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
460 accepted_hits = "%s/accepted_hits.bam" % tophat_Dir
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
461 iLaunchCufflinks = Cufflinks(accepted_hits, transcriptsfile , cufflinks_Dir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
462 iLaunchCufflinks.run()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
463 self._log.info("%s is finished!!!!" % self._assembly_tool)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
464 os.symlink("cufflinks/transcripts.gtf", "transcripts.gtf")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
465 cuffcompare_Dir = "cuffcompare"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
466 transcripts = "transcripts.gtf"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
467 iLaunchCuffcompare = Cuffcompare(transcriptsfile, transcripts, outprefix = "cuffcompare", workingDir = cuffcompare_Dir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
468 iLaunchCuffcompare.run()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
469 self._log.info("Cuffcompare is finished!!!!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
470
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
471
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
472 bedtools_closest_Dir = "bedtools_closest"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
473 os.mkdir(bedtools_closest_Dir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
474 os.chdir(bedtools_closest_Dir)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
475
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
476 transcriptsgtf = "%s_transcripts.gtf" % self._assembly_tool
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
477 os.symlink("../%s/transcripts.gtf" % self._assembly_tool,transcriptsgtf)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
478 transcriptsbed = "%s_transcripts.bed" % self._assembly_tool
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
479 self.getTranscriptToBed(transcriptsgtf,transcriptsbed)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
480 TEgff = os.path.basename(os.path.splitext(TEfile)[0]) + ".gff3"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
481 TEbed = os.path.basename(os.path.splitext(TEfile)[0]) + ".bed"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
482 os.symlink("%s" % TEfile,TEgff)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
483 self.getTEGFFToBed(TEgff,TEbed)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
484 iLauncherBdc= Bedtools_closest(transcriptsbed, TEbed, "bedtools_closest_%s" % transcriptsgtf.split(".")[0])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
485 iLauncherBdc.run()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
486 self._log.info("Bedtools closest is finished!!!!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
487 bedtoolsfile = "bedtools_closest_%s" % transcriptsgtf.split(".")[0]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
488 self.getTEnearPromoter(bedtoolsfile)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
489 tmap_file = "../cuffcompare/cuffcompare.transcripts.gtf.tmap"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
490 bedtools_file = "%s_TSSoverlaps_and_TE_closest_TSS_and_inclus_ALL" % bedtoolsfile
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
491
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
492 self.getClassCodeCuffcompare(tmap_file,bedtools_file)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
493 os.chdir("..")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
494 self._log.info("Done!!!!")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
495
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
496 except Exception:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
497 self._logAndRaise("ERROR in TEiso")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
498
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
499
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
500 if __name__ == "__main__":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
501 iLaunch = LaunchTEiso()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
502 iLaunch._setAttributesFromCmdLine()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
503 iLaunch.run()