comparison util/subtools.py @ 29:7e8a8b732db3 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1a81ebd0ddea950b84af3fc830e9267a4814b29f
author yating-l
date Wed, 16 May 2018 18:04:20 -0400
parents df42241d3731
children
comparison
equal deleted inserted replaced
28:6aa28a85cc38 29:7e8a8b732db3
11 import subprocess 11 import subprocess
12 import sys 12 import sys
13 import string 13 import string
14 import tempfile 14 import tempfile
15 15
16
16 class PopenError(Exception): 17 class PopenError(Exception):
17 def __init__(self, cmd, error, return_code): 18 def __init__(self, cmd, error, return_code):
18 self.cmd = cmd 19 self.cmd = cmd
19 self.error = error 20 self.error = error
20 self.return_code = return_code 21 self.return_code = return_code
21 22
22 def __str__(self): 23 def __str__(self):
23 message = "The subprocess {0} has returned the error: {1}.".format(self.cmd, self.return_code) 24 message = "The subprocess {0} has returned the error: {1}.".format(
24 message = ','.join((message, "Its error message is: {0}".format(self.error))) 25 self.cmd, self.return_code)
26 message = ','.join(
27 (message, "Its error message is: {0}".format(self.error)))
25 return repr(message) 28 return repr(message)
29
26 30
27 def _handleExceptionAndCheckCall(array_call, **kwargs): 31 def _handleExceptionAndCheckCall(array_call, **kwargs):
28 """ 32 """
29 This class handle exceptions and call the tool. 33 This class handle exceptions and call the tool.
30 It maps the signature of subprocess.check_call: 34 It maps the signature of subprocess.check_call:
39 output = None 43 output = None
40 error = None 44 error = None
41 45
42 # TODO: Check the value of array_call and <=[0] 46 # TODO: Check the value of array_call and <=[0]
43 logging.debug("Calling {0}:".format(cmd)) 47 logging.debug("Calling {0}:".format(cmd))
44 48 logging.debug("%s", array_call)
45 logging.debug("---------") 49 logging.debug("---------")
46 50
47 # TODO: Use universal_newlines option from Popen? 51 # TODO: Use universal_newlines option from Popen?
48 try: 52 try:
49 p = subprocess.Popen(array_call, stdout=stdout, stderr=stderr, shell=shell) 53 p = subprocess.Popen(array_call, stdout=stdout,
54 stderr=stderr, shell=shell)
50 55
51 # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate 56 # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate
52 57
53 output, error = p.communicate() 58 output, error = p.communicate()
54 59
62 if p.returncode: 67 if p.returncode:
63 if stderr == subprocess.PIPE: 68 if stderr == subprocess.PIPE:
64 raise PopenError(cmd, error, p.returncode) 69 raise PopenError(cmd, error, p.returncode)
65 else: 70 else:
66 # TODO: To Handle properly with a design behind, if we received a option as a file for the error 71 # TODO: To Handle properly with a design behind, if we received a option as a file for the error
67 raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}"\ 72 raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}"
68 .format(cmd, stderr.name, p.returncode)) 73 .format(cmd, stderr.name, p.returncode))
69 74
70 except OSError as e: 75 except OSError as e:
71 message = "The subprocess {0} has encountered an OSError: {1}".format(cmd, e.strerror) 76 message = "The subprocess {0} has encountered an OSError: {1}".format(
77 cmd, e.strerror)
72 if e.filename: 78 if e.filename:
73 message = '\n'.join((message, ", against this file: {0}".format(e.filename))) 79 message = '\n'.join(
80 (message, ", against this file: {0}".format(e.filename)))
74 logging.error(message) 81 logging.error(message)
75 sys.exit(-1) 82 sys.exit(-1)
76 except PopenError as p: 83 except PopenError as p:
77 message = "The subprocess {0} has returned the error: {1}.".format(p.cmd, p.return_code) 84 message = "The subprocess {0} has returned the error: {1}.".format(
78 message = '\n'.join((message, "Its error message is: {0}".format(p.error))) 85 p.cmd, p.return_code)
86 message = '\n'.join(
87 (message, "Its error message is: {0}".format(p.error)))
79 88
80 logging.exception(message) 89 logging.exception(message)
81 90
82 sys.exit(p.return_code) 91 sys.exit(p.return_code)
83 except Exception as e: 92 except Exception as e:
84 message = "The subprocess {0} has encountered an unknown error: {1}".format(cmd, e) 93 message = "The subprocess {0} has encountered an unknown error: {1}".format(
94 cmd, e)
85 logging.exception(message) 95 logging.exception(message)
86 96
87 sys.exit(-1) 97 sys.exit(-1)
88 return p 98 return p
99
89 100
90 def twoBitInfo(two_bit_file_name, two_bit_info_file): 101 def twoBitInfo(two_bit_file_name, two_bit_info_file):
91 """ 102 """
92 Call twoBitInfo and write the result into twoBit_info_file 103 Call twoBitInfo and write the result into twoBit_info_file
93 :param two_bit_file_name: 104 :param two_bit_file_name:
96 """ 107 """
97 array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file] 108 array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file]
98 p = _handleExceptionAndCheckCall(array_call) 109 p = _handleExceptionAndCheckCall(array_call)
99 return p 110 return p
100 111
112
101 def faToTwoBit(fasta_file_name, twoBitFile): 113 def faToTwoBit(fasta_file_name, twoBitFile):
102 """ 114 """
103 This function call faToTwoBit UCSC tool, and return the twoBitFile 115 This function call faToTwoBit UCSC tool, and return the twoBitFile
104 :param fasta_file_name: 116 :param fasta_file_name:
105 :param mySpecieFolder: 117 :param mySpecieFolder:
109 array_call = ['faToTwoBit', fasta_file_name, twoBitFile] 121 array_call = ['faToTwoBit', fasta_file_name, twoBitFile]
110 _handleExceptionAndCheckCall(array_call) 122 _handleExceptionAndCheckCall(array_call)
111 123
112 return twoBitFile 124 return twoBitFile
113 125
126
114 def gtfToGenePred(input_gtf_file_name, gene_pred_file_name): 127 def gtfToGenePred(input_gtf_file_name, gene_pred_file_name):
115 """ 128 """
116 Call gtfToGenePred and write the result into gene_pred_file_name 129 Call gtfToGenePred and write the result into gene_pred_file_name
117 :param input_gtf_file_name: 130 :param input_gtf_file_name:
118 :param gene_pred_file_name: 131 :param gene_pred_file_name:
120 """ 133 """
121 array_call = ['gtfToGenePred', input_gtf_file_name, gene_pred_file_name] 134 array_call = ['gtfToGenePred', input_gtf_file_name, gene_pred_file_name]
122 p = _handleExceptionAndCheckCall(array_call) 135 p = _handleExceptionAndCheckCall(array_call)
123 return p 136 return p
124 137
138
125 def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name): 139 def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name):
126 """ 140 """
127 Call gff3ToGenePred and write the result into gene_pred_file_name 141 Call gff3ToGenePred and write the result into gene_pred_file_name
128 :param input_gff3_file_name: 142 :param input_gff3_file_name:
129 :param gene_pred_file_name: 143 :param gene_pred_file_name:
130 :return: 144 :return:
131 """ 145 """
132 valid_gff3_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".gff3") 146 array_call = ['gff3ToGenePred', input_gff3_file_name, gene_pred_file_name]
133 validateGff(input_gff3_file_name, valid_gff3_file.name) 147 p = _handleExceptionAndCheckCall(array_call)
134 array_call = ['gff3ToGenePred', valid_gff3_file.name, gene_pred_file_name] 148 return p
135 p = _handleExceptionAndCheckCall(array_call) 149
136 return p
137 150
138 def genePredToBigGenePred(gene_pred_file_name, unsorted_bigGenePred_file_name): 151 def genePredToBigGenePred(gene_pred_file_name, unsorted_bigGenePred_file_name):
139 """ 152 """
140 Call genePredToBigGenePred and write the result into unsorted_bigGenePred_file_name 153 Call genePredToBigGenePred and write the result into unsorted_bigGenePred_file_name
141 :param gene_pred_file_name: 154 :param gene_pred_file_name:
146 gene_pred_file_name, 159 gene_pred_file_name,
147 unsorted_bigGenePred_file_name] 160 unsorted_bigGenePred_file_name]
148 p = _handleExceptionAndCheckCall(array_call) 161 p = _handleExceptionAndCheckCall(array_call)
149 return p 162 return p
150 163
164
151 def genePredToBed(gene_pred_file_name, unsorted_bed_file_name): 165 def genePredToBed(gene_pred_file_name, unsorted_bed_file_name):
152 """ 166 """
153 Call genePredToBed and write the result into unsorted_bed_file_name 167 Call genePredToBed and write the result into unsorted_bed_file_name
154 :param gene_pred_file_name: 168 :param gene_pred_file_name:
155 :param unsorted_bed_file_name: 169 :param unsorted_bed_file_name:
157 """ 171 """
158 array_call = ['genePredToBed', gene_pred_file_name, unsorted_bed_file_name] 172 array_call = ['genePredToBed', gene_pred_file_name, unsorted_bed_file_name]
159 p = _handleExceptionAndCheckCall(array_call) 173 p = _handleExceptionAndCheckCall(array_call)
160 return p 174 return p
161 175
176
162 def sort(unsorted_bed_file_name, sorted_bed_file_name): 177 def sort(unsorted_bed_file_name, sorted_bed_file_name):
163 """ 178 """
164 Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name 179 Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name
165 :param unsorted_bed_file_name: 180 :param unsorted_bed_file_name:
166 :param sorted_bed_file_name: 181 :param sorted_bed_file_name:
167 :return: 182 :return:
168 """ 183 """
169 array_call = ['sort', '-k', '1,1', '-k', '2,2n', unsorted_bed_file_name, '-o', sorted_bed_file_name] 184 array_call = ['sort', '-k', '1,1', '-k', '2,2n',
170 p = _handleExceptionAndCheckCall(array_call) 185 unsorted_bed_file_name, '-o', sorted_bed_file_name]
171 return p 186 p = _handleExceptionAndCheckCall(array_call)
187 return p
188
172 189
173 def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name): 190 def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name):
174 """ 191 """
175 Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name 192 Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name
176 :param two_bit_info_file_name: 193 :param two_bit_info_file_name:
177 :param chrom_sizes_file_name: 194 :param chrom_sizes_file_name:
178 :return: 195 :return:
179 """ 196 """
180 array_call = ['sort', '-k2rn', two_bit_info_file_name, '-o', chrom_sizes_file_name] 197 array_call = ['sort', '-k2rn', two_bit_info_file_name,
181 p = _handleExceptionAndCheckCall(array_call) 198 '-o', chrom_sizes_file_name]
182 return p 199 p = _handleExceptionAndCheckCall(array_call)
183 200 return p
184 def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, 201
185 typeOption=None, autoSql=None, tab=False, extraIndex=None): 202
203 def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, options=None):
186 """ 204 """
187 Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name 205 Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name
188 :param sorted_bed_file_name: 206 :param sorted_bed_file_name:
189 :param chrom_sizes_file_name: 207 :param chrom_sizes_file_name:
190 :param big_bed_file_name: 208 :param big_bed_file_name:
191 :return: 209 :return:
192 """ 210 """
193 211
194 # TODO: Move this into the _handleExceptionAndCheckCall function 212 array_call = ['bedToBigBed', sorted_bed_file_name,
195 # Parse the array 213 chrom_sizes_file_name, big_bed_file_name]
196 logging.debug("sorted_bed_file_name: {0}".format(sorted_bed_file_name)) 214 if options:
197 logging.debug("chrom_sizes_file_name: {0}".format(chrom_sizes_file_name)) 215 typeOption = options.get("typeOption")
198 logging.debug("big_bed_file_name: {0}".format(big_bed_file_name)) 216 autoSql = options.get("autoSql")
199 logging.debug("typeOption: {0}".format(typeOption)) 217 tab = options.get("tab")
200 logging.debug("autoSql: {0}".format(autoSql)) 218 extraIndex = options.get("extraIndex")
201 logging.debug("tab option: {0}".format(tab)) 219 if typeOption:
202 220 typeOption = ''.join(['-type=', typeOption])
203 array_call = ['bedToBigBed', sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name] 221 array_call.append(typeOption)
204 if typeOption: 222 if autoSql:
205 typeOption = ''.join(['-type=', typeOption]) 223 autoSql = ''.join(['-as=', autoSql])
206 array_call.append(typeOption) 224 array_call.append(autoSql)
207 if autoSql: 225 if tab:
208 autoSql = ''.join(['-as=', autoSql]) 226 array_call.append('-tab')
209 array_call.append(autoSql) 227 if extraIndex:
210 if tab: 228 index = ''.join(['-extraIndex=', extraIndex])
211 array_call.append('-tab') 229 array_call.append(index)
212 if extraIndex:
213 index = ''.join(['-extraIndex=', extraIndex])
214 array_call.append(index)
215
216 p = _handleExceptionAndCheckCall(array_call) 230 p = _handleExceptionAndCheckCall(array_call)
217 return p 231 return p
218 232
219 def sortBam(input_bam_file_name, output_sorted_bam_name): 233 def sortBam(input_bam_file_name, output_sorted_bam_name):
220 """ 234 """
221 Call samtools on input_bam_file_name and output the result in output_sorted_bam_name 235 Call samtools on input_bam_file_name and output the result in output_sorted_bam_name
222 :param input_bam_file_name: 236 :param input_bam_file_name:
223 :param output_sorted_bam_name: 237 :param output_sorted_bam_name:
224 :return: 238 :return:
225 """ 239 """
226 array_call = ['samtools', 'sort', input_bam_file_name, '-o', output_sorted_bam_name] 240 array_call = ['samtools', 'sort',
227 p = _handleExceptionAndCheckCall(array_call) 241 input_bam_file_name, '-o', output_sorted_bam_name]
228 return p 242 p = _handleExceptionAndCheckCall(array_call)
243 return p
244
229 245
230 def createBamIndex(input_sorted_bam_file_name, output_name_index_name): 246 def createBamIndex(input_sorted_bam_file_name, output_name_index_name):
231 """ 247 """
232 Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name 248 Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name
233 :param input_sorted_bam_file_name: 249 :param input_sorted_bam_file_name:
234 :param output_name_index_name: 250 :param output_name_index_name:
235 :return: 251 :return:
236 """ 252 """
237 array_call = ['samtools', 'index', input_sorted_bam_file_name, output_name_index_name] 253 array_call = ['samtools', 'index',
238 p = _handleExceptionAndCheckCall(array_call) 254 input_sorted_bam_file_name, output_name_index_name]
239 return p 255 p = _handleExceptionAndCheckCall(array_call)
256 return p
257
240 258
241 def pslToBigPsl(input_psl_file_name, output_bed12_file_name): 259 def pslToBigPsl(input_psl_file_name, output_bed12_file_name):
242 """ 260 """
243 Call `pslToBigPsl` on input_psl_file_name and output the result in output_bed12_file_name 261 Call `pslToBigPsl` on input_psl_file_name and output the result in output_bed12_file_name
244 :param input_psl_file_name: Name of the psl input file 262 :param input_psl_file_name: Name of the psl input file
249 array_call = ['pslToBigPsl', input_psl_file_name, output_bed12_file_name] 267 array_call = ['pslToBigPsl', input_psl_file_name, output_bed12_file_name]
250 268
251 p = _handleExceptionAndCheckCall(array_call) 269 p = _handleExceptionAndCheckCall(array_call)
252 return p 270 return p
253 271
254 #santitize trackName. Because track name must begin with a letter and 272 # santitize trackName. Because track name must begin with a letter and
255 # contain only the following chars: [a-zA-Z0-9_]. 273 # contain only the following chars: [a-zA-Z0-9_].
256 # See the "track" Common settings at: 274 # See the "track" Common settings at:
257 #https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments 275 # https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments
258 def fixName(filename): 276
259 if filename == 'cytoBandIdeo': 277 def validateFiles(input_file, chrom_sizes_file_name, file_type, options=None):
260 return filename 278 """
261 valid_chars = "_%s%s" % (string.ascii_letters, string.digits) 279 Call validateFiles on input_file, using chrom_sizes_file_name and file_type
262 sanitize_name = ''.join([c if c in valid_chars else '_' for c in filename]) 280 :param input_file:
263 sanitize_name = "gonramp_" + sanitize_name 281 :param chrom_sizes_file_name:
264 return sanitize_name 282 :param file_type:
265 283 :return:
266 def validateGff(orig_gff3, valid_gff3): 284 """
267 """ 285
268 Remove extra meta line: ##gff-version 3 286 array_call = ['validateFiles', '-chromInfo=' + chrom_sizes_file_name, '-type='+ file_type, input_file]
269 """ 287 if options:
270 valid = open(valid_gff3, 'w') 288 tab = options.get("tab")
271 num = 0 289 autoSql = options.get("autoSql")
272 with open(orig_gff3, 'r') as f: 290 logging.debug("tab: {0}".format(tab))
273 for line in f: 291 logging.debug("autoSql: {0}".format(autoSql))
274 if '##gff-version 3' in line: 292 if autoSql:
275 if num == 0: 293 autoSql = ''.join(['-as=', autoSql])
276 num += 1 294 array_call.append(autoSql)
277 else: 295 if tab:
278 continue 296 array_call.append('-tab')
279 valid.write(line) 297 p = _handleExceptionAndCheckCall(array_call)
280 298 return p
299
300 def pslCheck(input_file, options=None):
301 """
302 Call pslCheck on input_file
303 :param input_file:
304 :return:
305 """
306
307 array_call = ['pslCheck', input_file]
308 p = _handleExceptionAndCheckCall(array_call)
309 return p
310
311
312