comparison TEisotools-1.0/TEiso/Tophat.py @ 6:20ec0d14798e draft

Uploaded
author urgi-team
date Wed, 20 Jul 2016 05:00:24 -0400
parents
children
comparison
equal deleted inserted replaced
5:4093a2fb58be 6:20ec0d14798e
1 #!/usr/bin/env python
2
3 # Copyright INRA (Institut National de la Recherche Agronomique)
4 # http://www.inra.fr
5 # http://urgi.versailles.inra.fr
6 #
7 # This software is governed by the CeCILL license under French law and
8 # abiding by the rules of distribution of free software. You can use,
9 # modify and/ or redistribute the software under the terms of the CeCILL
10 # license as circulated by CEA, CNRS and INRIA at the following URL
11 # "http://www.cecill.info".
12 #
13 # As a counterpart to the access to the source code and rights to copy,
14 # modify and redistribute granted by the license, users are provided only
15 # with a limited warranty and the software's author, the holder of the
16 # economic rights, and the successive licensors have only limited
17 # liability.
18 #
19 # In this respect, the user's attention is drawn to the risks associated
20 # with loading, using, modifying and/or developing or reproducing the
21 # software by the user in light of its specific status of free software,
22 # that may mean that it is complicated to manipulate, and that also
23 # therefore means that it is reserved for developers and experienced
24 # professionals having in-depth computer knowledge. Users are therefore
25 # encouraged to load and test the software's suitability as regards their
26 # requirements in conditions enabling the security of their systems and/or
27 # data to be ensured and, more generally, to use and operate it in the
28 # same conditions as regards security.
29 #
30 # The fact that you are presently reading this means that you have had
31 # knowledge of the CeCILL license and that you accept its terms.
32
33 import os, glob
34 import subprocess
35 import time
36 from commons.core.checker.CheckerUtils import CheckerUtils
37 from commons.core.utils.RepetOptionParser import RepetOptionParser
38 from commons.core.LoggerFactory import LoggerFactory
39 from commons.core.utils.FileUtils import FileUtils
40
41 LOG_DEPTH = "repet.RNAseq_pipe"
42
43
44 class Tophat(object):
45
46 def __init__(self, workingDir = "", index_genome = "", single_paired = "", single_read = "", left_read ="", right_read = "", verbosity = 3):
47 #self._transcripts = input_transcripts
48 self._outputDir = workingDir
49 self._bowtie_index = index_genome
50 self._type = single_paired
51 self._single_read = single_read
52 self._left_read = left_read
53 self._right_read = right_read
54 self._verbosity = verbosity
55 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
56 def setAttributesFromCmdLine(self):
57 description = "TopHat maps short sequences from spliced transcripts to whole genomes..\n"
58 usage = "if reads are single:/n Tophat.py -G <GTF/GFF with known transcripts> -b <bowtie_index> -t single -r <single_read> \n"
59 usage +="if reads are paired:/n Tophat.py -G <GTF/GFF with known transcripts> -b <bowtie_index> -t paired -r1 <reads_left> -r2 <reads_right>\n"
60 parser = RepetOptionParser(description = description, usage = usage)
61 # parser.add_option( '-G', '--input_transcripts', dest='input_transcripts', help='GTF/GFF with known transcripts', default = "")
62 parser.add_option( '-o', '--outputDir', dest='outputDir', help='write all output files to this directory', default = "")
63 parser.add_option( '-b', '--index_genome', dest='index_genome', help='Indexing reference genome', default = "")
64 parser.add_option( '-e', '--single_paired', dest='single_paired', help='types of input reads', default = "paired")
65 parser.add_option( '-s', '--single_read', dest = 'single_read', help='a single input read', default = "" )
66 parser.add_option( '-l', '--left_read', dest='left_read', help='left reads', default = "" )
67 parser.add_option( '-r', '--right_read', dest='right_read', help='right reads', default = "" )
68 options, args = parser.parse_args()
69 self.setAttributesFromOptions(options)
70
71 def setAttributesFromOptions(self, options):
72 ## self._transcripts = options.input_transcripts
73 self._outputDir = options.outputDir
74 self._bowtie_index = options.index_genome
75 self._type = options.single_paired
76 self._single_read = options.single_read
77 self._left_read = options.left_read
78 self._right_read = options.right_read
79
80
81 def checkExecutables(self):
82
83 if not CheckerUtils.isExecutableInUserPath("tophat2"):
84 raise Exception("ERROR: tophat must be in your path")
85
86 def checkOptions(self):
87 if self._bowtie_index == "":
88 raise Exception("ERROR: No specified -b option!")
89
90 ## if self._transcripts != "":
91 ## if not FileUtils.isRessourceExists(self._transcripts):
92 ## raise Exception("ERROR: %s does not exist!" % self._transcripts)
93
94 if self._type == "paired":
95 for f in self._left_read:
96 if not FileUtils.isRessourceExists(f):
97 raise Exception("ERROR: %s does not exist!" % f)
98 for f in self._right_read:
99 if not FileUtils.isRessourceExists(f):
100 raise Exception("ERROR: %s does not exist!" % f)
101 elif self._type == "single":
102 for f in self._single_read:
103 if not FileUtils.isRessourceExists(f):
104 raise Exception("ERROR: %s does not exist!" % f)
105 else:
106 raise Exception("ERROR: No specified -t option!")
107
108 def getTophatCmd_single(self, out_tophat, BowtiePrefix, single_read):
109 cmd = "tophat2 -p 8 -o %s %s %s" % (out_tophat, BowtiePrefix, ",".join(single_read))
110 return cmd
111
112 def getTophatCmd_paired(self, out_tophat, BowtiePrefix, left_read, right_read):
113 ####sur SGE comme saruman
114 #cmd = "echo " + "'tophat -p 8 -o %s ../%s %s %s'" % (out_tophat, prefix, ",".join(left_Read), ",".join(right_Read))+ "|qsub -V -cwd -pe multithread 8"
115 cmd = "tophat2 -p 8 -o %s %s %s %s" % (out_tophat, BowtiePrefix, ",".join(left_read), ",".join(right_read))
116 #print cmd
117 return cmd
118
119 def run(self):
120 self.checkExecutables()
121 self.checkOptions()
122 try:
123 if os.path.exists(self._outputDir):
124 raise Exception("ERROR: %s already exists." % self._outputDir)
125 if self._type == "single":
126 cmd_tophat = self.getTophatCmd_single(self._outputDir, self._bowtie_index, self._single_read)
127 if self._type == "paired":
128 cmd_tophat = self.getTophatCmd_paired(self._outputDir, self._bowtie_index, self._left_read, self._right_read)
129 #print cmd_tophat
130 ## hide output of subprocess: stdout = index_dir_stderr
131 fstdout = open( "tophat.log" , 'w' )
132 process = subprocess.Popen(cmd_tophat, shell = True, stdout = fstdout, stderr=subprocess.STDOUT)
133 returncode = process.wait()
134 fstdout.close()
135 # get stderr, allowing for case where it's very large
136 fstdout = open("tophat.log", 'rb' )
137 stderr = ''
138 buffsize = 1048576
139 try:
140 while True:
141 stderr += fstdout.read( buffsize )
142 if not stderr or len( stderr ) % buffsize != 0:
143 break
144 except OverflowError:
145 pass
146 fstdout.close()
147 if returncode != 0:
148 raise Exception, stderr
149
150 os.system("mv tophat.log %s/tophat.log " % self._outputDir)
151 except Exception:
152 raise Exception("ERROR in %s " % cmd_tophat)
153
154
155
156
157 if __name__ == "__main__":
158 iLaunch = Tophat()
159 iLaunch.setAttributesFromCmdLine()
160 iLaunch.run()
161