Mercurial > repos > urgi-team > teiso
comparison TEisotools-1.0/TEiso/LaunchTEiso.py @ 6:20ec0d14798e draft
Uploaded
| author | urgi-team |
|---|---|
| date | Wed, 20 Jul 2016 05:00:24 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 5:4093a2fb58be | 6:20ec0d14798e |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 # Copyright INRA (Institut National de la Recherche Agronomique) | |
| 4 # http://www.inra.fr | |
| 5 # http://urgi.versailles.inra.fr | |
| 6 # | |
| 7 # This software is governed by the CeCILL license under French law and | |
| 8 # abiding by the rules of distribution of free software. You can use, | |
| 9 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 10 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 11 # "http://www.cecill.info". | |
| 12 # | |
| 13 # As a counterpart to the access to the source code and rights to copy, | |
| 14 # modify and redistribute granted by the license, users are provided only | |
| 15 # with a limited warranty and the software's author, the holder of the | |
| 16 # economic rights, and the successive licensors have only limited | |
| 17 # liability. | |
| 18 # | |
| 19 # In this respect, the user's attention is drawn to the risks associated | |
| 20 # with loading, using, modifying and/or developing or reproducing the | |
| 21 # software by the user in light of its specific status of free software, | |
| 22 # that may mean that it is complicated to manipulate, and that also | |
| 23 # therefore means that it is reserved for developers and experienced | |
| 24 # professionals having in-depth computer knowledge. Users are therefore | |
| 25 # encouraged to load and test the software's suitability as regards their | |
| 26 # requirements in conditions enabling the security of their systems and/or | |
| 27 # data to be ensured and, more generally, to use and operate it in the | |
| 28 # same conditions as regards security. | |
| 29 # | |
| 30 # The fact that you are presently reading this means that you have had | |
| 31 # knowledge of the CeCILL license and that you accept its terms. | |
| 32 | |
| 33 | |
| 34 from commons.core.parsing.GtfParser import GtfParser | |
| 35 from commons.core.parsing.GffParser import GffParser | |
| 36 from TEiso.Bowtie_build import Bowtie_build | |
| 37 from TEiso.Tophat import Tophat | |
| 38 from TEiso.Cufflinks import Cufflinks | |
| 39 from TEiso.Cuffcompare import Cuffcompare | |
| 40 from TEiso.Bedtools_closest import Bedtools_closest | |
| 41 from commons.core.LoggerFactory import LoggerFactory | |
| 42 from commons.core.utils.RepetOptionParser import RepetOptionParser | |
| 43 from commons.core.utils.FileUtils import FileUtils | |
| 44 import os | |
| 45 import time | |
| 46 import re | |
| 47 import sys | |
| 48 LOG_NAME = "repet.TEiso" | |
| 49 | |
| 50 class LaunchTEiso(object): | |
| 51 | |
| 52 def __init__(self, reference="", input_transcripts="", single_paired="", single_reads="", left_reads="", right_reads="", transposable_element = "", assembly_tool="", verbosity=3): | |
| 53 self._reference = reference | |
| 54 self._transcripts = input_transcripts | |
| 55 self._type = single_paired | |
| 56 self._single_reads = single_reads.split(",") | |
| 57 self._left_reads = left_reads.split(",") | |
| 58 self._right_reads = right_reads.split(",") | |
| 59 self._TE = transposable_element | |
| 60 self._assembly_tool = assembly_tool | |
| 61 self._verbosity = verbosity | |
| 62 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_NAME, self.__class__.__name__), self._verbosity) | |
| 63 | |
| 64 def _setAttributesFromCmdLine(self): | |
| 65 self._toolVersion = "0.1" | |
| 66 description = "TEiso version %s" % self._toolVersion | |
| 67 epilog = "\n if reads are single:\n LaunchTEiso.py -f <genome.fa> -g <transcripts.gtf> -e single -s <single_read> -t <TE.gff> -a cufflinks \n" | |
| 68 epilog += " if reads are paired:\n LaunchTEiso.py -f <genome.fa> -g <transcripts.gtf> -e paired -l <reads_left> -r <reads_right> -t <TE.gff> -a cufflinks \n" | |
| 69 parser = RepetOptionParser(description = description, epilog = epilog, version = self._toolVersion) | |
| 70 parser.add_option('-f' , '--input_reference' , dest='input_reference' , help='file with ref sequences') | |
| 71 parser.add_option('-g' , '--input_transcripts', dest='input_transcripts', help='GTF/GFF with known transcripts' , default="") | |
| 72 parser.add_option('-e' , '--single_paired' , dest='single_paired' , help='type of input reads, single or paired end', default="paired") | |
| 73 parser.add_option('-s' , '--single_read' , dest='single_read' , help='a single input read' , default="") | |
| 74 parser.add_option('-l', '--left_read' , dest='left_read' , help='left reads' , default="") | |
| 75 parser.add_option('-r', '--right_read' , dest='right_read' , help='right reads' , default="") | |
| 76 parser.add_option('-t' , '--input_transposable_element', dest='input_transposable_element', help='GFF with known transposable_element' , default="") | |
| 77 parser.add_option('-a' , '--assembly_tool' , dest='assembly_tool' , help='type of RNA-Seq assembly tool' , default="cufflinks") | |
| 78 options = parser.parse_args()[0] | |
| 79 self.setAttributesFromOptions(options) | |
| 80 | |
| 81 def setAttributesFromOptions(self, options): | |
| 82 self._reference = options.input_reference | |
| 83 self._transcripts = options.input_transcripts | |
| 84 self._type = options.single_paired | |
| 85 self._single_reads = options.single_read.split(",") | |
| 86 self._left_reads = options.left_read.split(",") | |
| 87 self._right_reads = options.right_read.split(",") | |
| 88 self._TE = options.input_transposable_element | |
| 89 self._assembly_tool = options.assembly_tool | |
| 90 | |
| 91 def _logAndRaise(self, errorMsg): | |
| 92 self._log.error(errorMsg) | |
| 93 #raise Exception(errorMsg) | |
| 94 sys.exit(1) | |
| 95 | |
| 96 def checkOptions(self): | |
| 97 if self._type == "paired": | |
| 98 if self._single_reads != ['']: | |
| 99 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!") | |
| 100 if self._left_reads == ['']: | |
| 101 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!") | |
| 102 if self._right_reads == ['']: | |
| 103 self._logAndRaise("ERROR: for paired reads, you shoud use option left and right reads!") | |
| 104 if self._right_reads == self._left_reads: | |
| 105 self._logAndRaise("ERROR: -l and -r options are same!") | |
| 106 | |
| 107 if self._type == "single": | |
| 108 if self._left_reads != ['']: | |
| 109 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!") | |
| 110 if self._right_reads != ['']: | |
| 111 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!") | |
| 112 if self._single_reads == ['']: | |
| 113 self._logAndRaise("ERROR: for single reads, you shoud use option single reads!") | |
| 114 | |
| 115 if self._reference != "": | |
| 116 if not FileUtils.isRessourceExists(self._reference): | |
| 117 self._logAndRaise("ERROR: reference file %s does not exist!" % self._reference) | |
| 118 else: | |
| 119 self._logAndRaise("ERROR: No specified -f option!") | |
| 120 | |
| 121 if self._transcripts != "": | |
| 122 if not FileUtils.isRessourceExists(self._transcripts): | |
| 123 self._logAndRaise("ERROR: transcripts file %s does not exist!" % self._transcripts) | |
| 124 else: | |
| 125 self._logAndRaise("ERROR: No specified -g option!") | |
| 126 | |
| 127 if self._TE != "": | |
| 128 if not FileUtils.isRessourceExists(self._TE): | |
| 129 self._logAndRaise("ERROR: transposable elements %s does not exist!" % self._TE) | |
| 130 else: | |
| 131 self._logAndRaise("ERROR: No specified -t option!") | |
| 132 | |
| 133 | |
| 134 | |
| 135 def getTranscriptToBed(self, inputFile,outputFile): | |
| 136 try: | |
| 137 filewrite=open(outputFile, "w") | |
| 138 gtfParser = GtfParser(inputFile, assemblyTools=True) | |
| 139 for transcript in gtfParser.getIterator(): | |
| 140 if(transcript.getDirection()==1): | |
| 141 strand="+" | |
| 142 else: | |
| 143 strand="-" | |
| 144 filewrite.write("%s\t%s\t%s\t%s\t%s\t%s\t%.3f\n" % (transcript.getChromosome(),transcript.getStart(), | |
| 145 transcript.getEnd(), transcript.getTagValue("ID"), transcript.getTagValue("gene_id"), | |
| 146 strand,float(transcript.getTagValue("FPKM")) )) | |
| 147 | |
| 148 filewrite.close() | |
| 149 except: | |
| 150 self._logAndRaise("Couldn't open %s for writing" % outputFile) | |
| 151 | |
| 152 | |
| 153 def getTEGFFToBed(self, inputFile, outputFile): | |
| 154 """TODO Dont write bed line when the strand is '.' | |
| 155 See Gtf parser option assemblyTools | |
| 156 """ | |
| 157 try: | |
| 158 filewrite=open(outputFile, "w") | |
| 159 gffParser = GffParser(inputFile) | |
| 160 for transcript in gffParser.getIterator(): | |
| 161 if(transcript.getDirection()==1): | |
| 162 strand="+" | |
| 163 else: | |
| 164 strand="-" | |
| 165 filewrite.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (transcript.getChromosome(),transcript.getStart(), | |
| 166 transcript.getEnd(), transcript.getTagValue("ID").split("_")[0]+"_", transcript.getTagValue("Target").split("_")[0], strand) ) | |
| 167 | |
| 168 filewrite.close() | |
| 169 except: | |
| 170 self._logAndRaise("Couldn't open %s for writing" % outputFile) | |
| 171 | |
| 172 | |
| 173 def getTEnearPromoter (self, bedtoolsfile): | |
| 174 #### BEdParser.py in commons is not used because the format of this bed file is different. | |
| 175 # #Chrom starttr endtr transcript_id gene_ID strand fpkm chromte startte endte idte targetTE strandTE distance | |
| 176 #scaffold_1 37570 37785 GSSPFG00034586001-RA GSSPFG00034586001 + 0.0000000000 scaffold_1 33914 40164 ms162_ PotentialHostGene - 0 | |
| 177 | |
| 178 linelist = [] | |
| 179 tmplist = [] | |
| 180 with open(bedtoolsfile, "r") as bedFile: | |
| 181 for line in bedFile.readlines(): | |
| 182 m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\t+([^\t]+)\t+([^\t]+)\t+([+-])\t+(\d+\.\d+)\t+([^\t]+)+\t+(\d+)\t+(\d+)\t+([^\t]+)+\t+([^\t]+)\t+([+-])\t+([^\t]+)",line) | |
| 183 if(m != None): | |
| 184 start_TR = int(m.group(2))##F[1] | |
| 185 end_TR = int(m.group(3))##F[2] | |
| 186 strand_TR= m.group(6) ##[5] | |
| 187 start_TE = int(m.group(9))##[8] | |
| 188 end_TE = int(m.group(10))##[9] | |
| 189 dist = int(m.group(14))##[13] | |
| 190 if (start_TE < start_TR) and (end_TE < start_TR) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE) and (dist != 0): | |
| 191 tmplist.append(line.strip()) | |
| 192 tmplist.append("TE_closest_TSS") | |
| 193 linelist.append(tmplist) | |
| 194 tmplist = [] | |
| 195 # F[1] gene F[2] | |
| 196 # =========================> | |
| 197 # ------------ | |
| 198 # F[8] F[9] | |
| 199 | |
| 200 if (start_TE > end_TR) and (end_TE > end_TR) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE) and (dist != 0): | |
| 201 tmplist.append(line.strip()) | |
| 202 tmplist.append("TE_closest_TSS") | |
| 203 linelist.append(tmplist) | |
| 204 tmplist = [] | |
| 205 | |
| 206 # F[1] F[2] | |
| 207 # <====================== | |
| 208 # --------------- | |
| 209 | |
| 210 if (start_TE <= start_TR) and (start_TR < end_TE) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE): | |
| 211 for i in range(0,len(line.split("\t"))-1): | |
| 212 tmplist.append(line.split("\t")[i]) | |
| 213 overlap = (end_TE-start_TR)+1 | |
| 214 tmplist.append(overlap) | |
| 215 tmplist.append("TE_overlap_TSS") | |
| 216 linelist.append(tmplist) | |
| 217 tmplist = [] | |
| 218 | |
| 219 # F[1] gene F[2] | |
| 220 # =========================> | |
| 221 # ------------- | |
| 222 # F[8] F[9] | |
| 223 | |
| 224 # gene | |
| 225 # F[1]=========================>F[2] | |
| 226 | |
| 227 # F[8]---------------F[9] | |
| 228 | |
| 229 | |
| 230 if (start_TE < start_TR) and (start_TR == end_TE) and (strand_TR =="+") and (end_TR > end_TE) and (end_TR > start_TE): | |
| 231 for i in range(0,len(line.split("\t"))-1): | |
| 232 tmplist.append(line.split("\t")[i]) | |
| 233 tmplist.append(0) | |
| 234 tmplist.append("TE_overlap_TSS") | |
| 235 linelist.append(tmplist) | |
| 236 tmplist = [] | |
| 237 | |
| 238 ## F[1]=============================>F[2] | |
| 239 ## F[8]---------------F[9] | |
| 240 | |
| 241 | |
| 242 if (start_TE < end_TR) and (end_TR <= end_TE) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE): | |
| 243 for i in range(0,len(line.split("\t"))-1): | |
| 244 tmplist.append(line.split("\t")[i]) | |
| 245 overlap = (end_TR-start_TE)+1 | |
| 246 tmplist.append(overlap) | |
| 247 tmplist.append("TE_overlap_TSS") | |
| 248 linelist.append(tmplist) | |
| 249 tmplist = [] | |
| 250 | |
| 251 | |
| 252 # F[1]<======================F[2] | |
| 253 # --------------- | |
| 254 # F[8] F[9] | |
| 255 # | |
| 256 # | |
| 257 # F[1]<=============================F[2] | |
| 258 # F[8]---------------F[9] | |
| 259 | |
| 260 if (start_TE == end_TR) and (end_TR < end_TE) and (strand_TR =="-") and (start_TR < start_TE) and (start_TR < end_TE): | |
| 261 for i in range(0,len(line.split("\t"))-1): | |
| 262 tmplist.append(line.split("\t")[i]) | |
| 263 tmplist.append(0) | |
| 264 tmplist.append("TE_overlap_TSS") | |
| 265 linelist.append(tmplist) | |
| 266 tmplist = [] | |
| 267 | |
| 268 # F[1]<=============================F[2] | |
| 269 # F[8]---------------F[9] | |
| 270 | |
| 271 if (start_TR < start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE < end_TR) and (dist == 0): | |
| 272 tmplist.append(line.strip()) | |
| 273 tmplist.append("TE_in_gene") | |
| 274 linelist.append(tmplist) | |
| 275 tmplist = [] | |
| 276 | |
| 277 # F[1] gene F[2] | |
| 278 # ============================== | |
| 279 # ----------- | |
| 280 # F[8] F[9] | |
| 281 | |
| 282 | |
| 283 if (start_TE < start_TR) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TR < end_TE): | |
| 284 for i in range(0,len(line.split("\t"))-1): | |
| 285 tmplist.append(line.split("\t")[i]) | |
| 286 lenTE = (end_TE-start_TE)+1 | |
| 287 tmplist.append(lenTE) | |
| 288 tmplist.append("gene_in_TE") | |
| 289 linelist.append(tmplist) | |
| 290 tmplist = [] | |
| 291 | |
| 292 # F[1]======================F[2] | |
| 293 # F[8]----------------------------------------------------F[9] | |
| 294 | |
| 295 | |
| 296 if (strand_TR =="+") and (start_TR > start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE == end_TR): | |
| 297 tmplist.append(line.strip()) | |
| 298 tmplist.append("gene_in_TE") | |
| 299 linelist.append(tmplist) | |
| 300 tmplist = [] | |
| 301 | |
| 302 # F[1]==================================>F[2] | |
| 303 # F[8]----------------------------------------------------------F[9] | |
| 304 | |
| 305 if (strand_TR =="-") and (start_TR > start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE == end_TR): | |
| 306 tmplist.append(line.strip()) | |
| 307 tmplist.append("gene_in_TE") | |
| 308 linelist.append(tmplist) | |
| 309 tmplist = [] | |
| 310 | |
| 311 # F[1]<==================================F[2] | |
| 312 # F[8]----------------------------------------------------------F[9] | |
| 313 | |
| 314 if (strand_TR =="+") and (start_TR == start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE > end_TR): | |
| 315 tmplist.append(line.strip()) | |
| 316 tmplist.append("gene_in_TE") | |
| 317 linelist.append(tmplist) | |
| 318 tmplist = [] | |
| 319 | |
| 320 # F[1]==================================>F[2] | |
| 321 # F[8]----------------------------------------------------------F[9] | |
| 322 | |
| 323 if (strand_TR =="-") and (start_TR == start_TE) and (start_TR < end_TE) and (start_TE < end_TR) and (end_TE > end_TR): | |
| 324 tmplist.append(line.strip()) | |
| 325 tmplist.append("gene_in_TE") | |
| 326 linelist.append(tmplist) | |
| 327 tmplist = [] | |
| 328 | |
| 329 # F[1]<==================================F[2] | |
| 330 # F[8]----------------------------------------------------------F[9] | |
| 331 | |
| 332 favorablecases = "%s_TSSoverlaps_and_TE_closest_TSS_and_inclus_ALL" % bedtoolsfile | |
| 333 w = open(favorablecases,'w') | |
| 334 for s in linelist: | |
| 335 line= "\t".join(str(item) for item in s) | |
| 336 w.write("%s\n" % line) | |
| 337 w.close() | |
| 338 | |
| 339 | |
| 340 def getClassCodeCuffcompare(self, tmap_file, bedtools_file): | |
| 341 class_code_dic = {} | |
| 342 lcode_ref = [] | |
| 343 tmp = [] | |
| 344 linetowrite =[] | |
| 345 with open(tmap_file) as tmap: | |
| 346 tmapline = tmap.readlines() | |
| 347 for i in range(1,len(tmapline)): | |
| 348 cuff_id = tmapline[i].split("\t")[4].strip() | |
| 349 class_code = tmapline[i].split("\t")[2].strip() | |
| 350 ref_id = tmapline[i].split("\t")[1].strip() | |
| 351 lcode_ref.append(class_code) | |
| 352 lcode_ref.append(ref_id) | |
| 353 class_code_dic[cuff_id] = lcode_ref | |
| 354 lcode_ref = [] | |
| 355 | |
| 356 with open(bedtools_file) as bedtools: | |
| 357 bedtoolsline = bedtools.readlines() | |
| 358 for i in xrange(0,len(bedtoolsline)): | |
| 359 tmp = bedtoolsline[i].strip().split("\t") | |
| 360 transcript_bedtools = bedtoolsline[i].split("\t")[3].strip() | |
| 361 if transcript_bedtools in class_code_dic.keys(): | |
| 362 tmp.append(class_code_dic[transcript_bedtools][0]) | |
| 363 tmp.append(class_code_dic[transcript_bedtools][1]) | |
| 364 else: | |
| 365 tmp.append("NA") | |
| 366 tmp.append("NA") | |
| 367 | |
| 368 linetowrite.append(tmp) | |
| 369 tmp=[] | |
| 370 | |
| 371 | |
| 372 output = "%s_with_Ref" % bedtools_file | |
| 373 w = open(output,'w') | |
| 374 line = "" | |
| 375 for i in xrange(0,len(linetowrite)): | |
| 376 for j in range(0,17): | |
| 377 line = line + linetowrite[i][j] + "\t" | |
| 378 w.write(line) | |
| 379 w.write("\n") | |
| 380 line = "" | |
| 381 w.close() | |
| 382 | |
| 383 def run(self): | |
| 384 self.checkOptions() | |
| 385 try: | |
| 386 LoggerFactory.setLevel(self._log, self._verbosity) | |
| 387 exeDir = os.getcwd() | |
| 388 workingDir = "out_TEiso_%s" % time.strftime("%Y%m%d%H%M%S") | |
| 389 if os.path.exists(workingDir): | |
| 390 self._logAndRaise("ERROR: %s already exists." % workingDir) | |
| 391 os.mkdir(workingDir) | |
| 392 referencefile = os.path.abspath(self._reference) | |
| 393 transcriptsfile = os.path.abspath(self._transcripts) | |
| 394 TEfile = os.path.abspath(self._TE) | |
| 395 print "workingDir >>>>> ",workingDir | |
| 396 os.symlink("%s" % os.path.abspath(self._reference), "%s/%s" % (workingDir, os.path.basename(self._reference))) | |
| 397 os.symlink("%s" % os.path.abspath(self._transcripts), "%s/%s" % (workingDir, os.path.basename(self._transcripts))) | |
| 398 os.chdir(workingDir) | |
| 399 bowtie_build_Dir = "bowtie_build" | |
| 400 prefixbowtie = os.path.basename(self._reference).split(".")[0] | |
| 401 iLaunchBowtie = Bowtie_build(referencefile, prefixbowtie, bowtie_build_Dir) | |
| 402 iLaunchBowtie.run() | |
| 403 os.chdir(exeDir) | |
| 404 self._log.info("Indexing genome is finished!!!!") | |
| 405 tophat_Dir = "tophat" | |
| 406 if self._type == "single": | |
| 407 l_single_reads = [] | |
| 408 for reads in range(0, len(self._single_reads)): | |
| 409 os.symlink("%s" % os.path.abspath(self._single_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._single_reads[reads]))) | |
| 410 filename = os.path.splitext(self._single_reads[reads])[0] | |
| 411 filetype = os.path.splitext(self._single_reads[reads])[1] | |
| 412 if filetype == ".gz": | |
| 413 os.system("gunzip -c %s > %s/%s" % (self._single_reads[reads], workingDir, os.path.basename(filename))) | |
| 414 if filetype == ".bz2": | |
| 415 os.system("bunzip2 -c %s > %s/%s" % (os.path.abspath(self._single_reads[reads]), workingDir, os.path.basename(filename))) | |
| 416 if filetype ==".fq": | |
| 417 filename = self._single_reads[reads] | |
| 418 l_single_reads.append("%s" % os.path.basename(filename)) | |
| 419 bowtiePrefix = "%s/%s" % (bowtie_build_Dir, prefixbowtie) | |
| 420 path = ("%s/%s") % (exeDir,workingDir) | |
| 421 os.chdir(path) | |
| 422 iLaunchTophat = Tophat(tophat_Dir, bowtiePrefix, self._type, l_single_reads, self._left_reads, self._right_reads) | |
| 423 iLaunchTophat.run() | |
| 424 if self._type == "paired": | |
| 425 l_left_reads = [] | |
| 426 l_right_reads = [] | |
| 427 for reads in range(0, len(self._left_reads)): | |
| 428 os.symlink("%s" % os.path.abspath(self._left_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._left_reads[reads]))) | |
| 429 filename = os.path.splitext(self._left_reads[reads])[0] | |
| 430 filetype = os.path.splitext(self._left_reads[reads])[1] | |
| 431 ##TODO : check type input file: mimetypes.guess_type(self._single_reads[reads]) | |
| 432 if filetype == ".gz": | |
| 433 os.system("gunzip -c %s > %s/%s" % (self._left_reads[reads],workingDir, os.path.basename(filename))) | |
| 434 if filetype == ".bz2": | |
| 435 os.system("bunzip2 -c %s > %s/%s" % (self._left_reads[reads],workingDir, os.path.basename(filename))) | |
| 436 if filetype ==".fq": | |
| 437 filename = self._left_reads[reads] | |
| 438 l_left_reads.append("%s" % os.path.basename(filename)) | |
| 439 | |
| 440 for reads in range(0, len(self._right_reads)): | |
| 441 os.symlink("%s" % os.path.abspath(self._right_reads[reads]), "%s/%s" % (workingDir, os.path.basename(self._right_reads[reads]))) | |
| 442 filename = os.path.splitext(self._right_reads[reads])[0] | |
| 443 filetype = os.path.splitext(self._right_reads[reads])[1] | |
| 444 if filetype == ".gz": | |
| 445 os.system("gunzip -c %s > %s/%s" % (self._right_reads[reads],workingDir, os.path.basename(filename))) | |
| 446 if filetype == ".bz2": | |
| 447 os.system("bunzip2 -c %s > %s/%s" % (self._right_reads[reads],workingDir, os.path.basename(filename))) | |
| 448 if filetype ==".fq": | |
| 449 filename = self._right_reads[reads] | |
| 450 l_right_reads.append("%s" % os.path.basename(filename)) | |
| 451 bowtiePrefix = "%s/%s" % (bowtie_build_Dir, prefixbowtie) | |
| 452 path= ("%s/%s") % (exeDir,workingDir) | |
| 453 os.chdir(path) | |
| 454 iLaunchTophat = Tophat(tophat_Dir, bowtiePrefix, self._type, self._single_reads, l_left_reads, l_right_reads) | |
| 455 iLaunchTophat.run() | |
| 456 self._log.info("Mapping reads is finished!!!!") | |
| 457 | |
| 458 if self._assembly_tool == "cufflinks": | |
| 459 cufflinks_Dir = "cufflinks" | |
| 460 accepted_hits = "%s/accepted_hits.bam" % tophat_Dir | |
| 461 iLaunchCufflinks = Cufflinks(accepted_hits, transcriptsfile , cufflinks_Dir) | |
| 462 iLaunchCufflinks.run() | |
| 463 self._log.info("%s is finished!!!!" % self._assembly_tool) | |
| 464 os.symlink("cufflinks/transcripts.gtf", "transcripts.gtf") | |
| 465 cuffcompare_Dir = "cuffcompare" | |
| 466 transcripts = "transcripts.gtf" | |
| 467 iLaunchCuffcompare = Cuffcompare(transcriptsfile, transcripts, outprefix = "cuffcompare", workingDir = cuffcompare_Dir) | |
| 468 iLaunchCuffcompare.run() | |
| 469 self._log.info("Cuffcompare is finished!!!!") | |
| 470 | |
| 471 | |
| 472 bedtools_closest_Dir = "bedtools_closest" | |
| 473 os.mkdir(bedtools_closest_Dir) | |
| 474 os.chdir(bedtools_closest_Dir) | |
| 475 | |
| 476 transcriptsgtf = "%s_transcripts.gtf" % self._assembly_tool | |
| 477 os.symlink("../%s/transcripts.gtf" % self._assembly_tool,transcriptsgtf) | |
| 478 transcriptsbed = "%s_transcripts.bed" % self._assembly_tool | |
| 479 self.getTranscriptToBed(transcriptsgtf,transcriptsbed) | |
| 480 TEgff = os.path.basename(os.path.splitext(TEfile)[0]) + ".gff3" | |
| 481 TEbed = os.path.basename(os.path.splitext(TEfile)[0]) + ".bed" | |
| 482 os.symlink("%s" % TEfile,TEgff) | |
| 483 self.getTEGFFToBed(TEgff,TEbed) | |
| 484 iLauncherBdc= Bedtools_closest(transcriptsbed, TEbed, "bedtools_closest_%s" % transcriptsgtf.split(".")[0]) | |
| 485 iLauncherBdc.run() | |
| 486 self._log.info("Bedtools closest is finished!!!!") | |
| 487 bedtoolsfile = "bedtools_closest_%s" % transcriptsgtf.split(".")[0] | |
| 488 self.getTEnearPromoter(bedtoolsfile) | |
| 489 tmap_file = "../cuffcompare/cuffcompare.transcripts.gtf.tmap" | |
| 490 bedtools_file = "%s_TSSoverlaps_and_TE_closest_TSS_and_inclus_ALL" % bedtoolsfile | |
| 491 | |
| 492 self.getClassCodeCuffcompare(tmap_file,bedtools_file) | |
| 493 os.chdir("..") | |
| 494 self._log.info("Done!!!!") | |
| 495 | |
| 496 except Exception: | |
| 497 self._logAndRaise("ERROR in TEiso") | |
| 498 | |
| 499 | |
| 500 if __name__ == "__main__": | |
| 501 iLaunch = LaunchTEiso() | |
| 502 iLaunch._setAttributesFromCmdLine() | |
| 503 iLaunch.run() |
