3
|
1 import os
|
|
2 import sys
|
|
3 import logging
|
|
4 import subprocess
|
|
5
|
|
6 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir,'jared')))
|
|
7
|
|
8 from vcf_reader_func import checkFormat
|
|
9 from bcftools import check_bcftools_for_errors
|
|
10
|
|
11 def check_bgzip_for_errors (bgzip_stderr):
|
|
12 '''
|
|
13 Checks the bgzip stderr for errors
|
|
14
|
|
15 Parameters
|
|
16 ----------
|
|
17 bgzip_stderr : str
|
|
18 bgzip stderr
|
|
19
|
|
20 Raises
|
|
21 ------
|
|
22 IOError
|
|
23 If bgzip stderr returns an error
|
|
24 '''
|
|
25
|
|
26 if bgzip_stderr:
|
|
27 raise IOError('Error occured while compressing the vcf file')
|
|
28
|
|
29 def bgzip_decompress_vcfgz (vcfgz_filename, out_prefix = '', keep_original = False):
|
|
30 '''
|
|
31 Converts a vcf.gz to vcf
|
|
32
|
|
33 The function automates bgzip to decompress a vcf.gz file into a vcf
|
|
34
|
|
35 Parameters
|
|
36 ----------
|
|
37 vcfgz_filename : str
|
|
38 The file name of the vcf.gz file to be decompressed
|
|
39 out_prefix : str
|
|
40 Output file prefix (i.e. filename without extension)
|
|
41 keep_original : bool
|
|
42 Specifies if the original file should be kept
|
|
43
|
|
44 Raises
|
|
45 ------
|
|
46 IOError
|
|
47 Error in creating the compressed file
|
|
48 '''
|
|
49
|
|
50 # Run bgzip with stdout piped to file
|
|
51 if keep_original or out_prefix:
|
|
52
|
|
53 if out_prefix:
|
|
54
|
|
55 # Assign the bgzip filename
|
|
56 vcf_filename = out_prefix + '.vcf'
|
|
57
|
|
58 else:
|
|
59
|
|
60 # Seperate into path and filename
|
|
61 split_path, split_filename = os.path.split(vcfgz_filename)
|
|
62
|
|
63 # Remove any file extensions
|
|
64 vcf_basename = split_filename.split(os.extsep)[0] + '.vcf'
|
|
65
|
|
66 # Join path and filename
|
|
67 vcf_filename = os.path.join(split_path, vcf_basename)
|
|
68
|
|
69 # Create the output file
|
|
70 vcf_file = open(vcf_filename, 'w')
|
|
71
|
|
72 # bgzip subprocess call
|
|
73 bgzip_call = subprocess.Popen(['bgzip', '-dc', vcfgz_filename], stdout = vcf_file, stderr = subprocess.PIPE)
|
|
74
|
|
75 # Run bgzip normally
|
|
76 else:
|
|
77
|
|
78 # bgzip subprocess call
|
|
79 bgzip_call = subprocess.Popen(['bgzip', '-d', vcfgz_filename], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
|
|
80
|
|
81 # Save the stdout and stderr from bgzip
|
|
82 bgzip_out, bgzip_err = bgzip_call.communicate()
|
|
83
|
|
84 # Check that output file was compressed correctly
|
|
85 check_bgzip_for_errors(bgzip_err)
|
|
86
|
|
87 # Delete input when also using an output prefix
|
|
88 if out_prefix and not keep_original:
|
|
89 os.remove(vcfgz_filename)
|
|
90
|
|
91 def bgzip_compress_vcf (vcf_filename, out_prefix = '', keep_original = False):
|
|
92 '''
|
|
93 Converts a vcf to vcf.gz
|
|
94
|
|
95 The function automates bgzip to compress a vcf file into a vcf.gz
|
|
96
|
|
97 Parameters
|
|
98 ----------
|
|
99 vcf_filename : str
|
|
100 The file name of the vcf file to be compressed
|
|
101 keep_original : bool
|
|
102 Specifies if the original file should be kept
|
|
103
|
|
104 Raises
|
|
105 ------
|
|
106 IOError
|
|
107 Error in creating the compressed file
|
|
108 '''
|
|
109
|
|
110 # Compress and keep the original file
|
|
111 if keep_original or out_prefix:
|
|
112
|
|
113 if out_prefix:
|
|
114
|
|
115 # Assign the filename
|
|
116 vcfgz_filename = out_prefix + '.vcf.gz'
|
|
117
|
|
118 else:
|
|
119
|
|
120 # Seperate into path and filename
|
|
121 split_path, split_filename = os.path.split(vcfgz_filename)
|
|
122
|
|
123 # Remove any file extensions
|
|
124 vcfgz_basename = split_filename.split(os.extsep)[0] + '.vcf.gz'
|
|
125
|
|
126 # Join path and filename
|
|
127 vcfgz_filename = os.path.join(split_path, vcfgz_basename)
|
|
128
|
|
129
|
|
130 # Create the output file
|
|
131 vcfgz_file = open(vcfgz_filename, 'w')
|
|
132
|
|
133 # bgzip subprocess call
|
|
134 bgzip_call = subprocess.Popen(['bgzip', '-c', vcf_filename], stdout = vcfgz_file, stderr = subprocess.PIPE)
|
|
135
|
|
136 # Compress and do not keep the original file
|
|
137 else:
|
|
138
|
|
139 # bgzip subprocess call
|
|
140 bgzip_call = subprocess.Popen(['bgzip', vcf_filename], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
|
|
141
|
|
142 # Save the stdout and stderr from bgzip
|
|
143 bgzip_out, bgzip_err = bgzip_call.communicate()
|
|
144
|
|
145 # Check that output file was compressed correctly
|
|
146 check_bgzip_for_errors(bgzip_err)
|
|
147
|
|
148 def cvt_vcftools_site_to_bed (vcftools_out_str):
|
|
149 # Check if str in the header
|
|
150 if 'CHROM' not in vcftools_out_str or 'POS' not in vcftools_out_str:
|
|
151 # Split the line into a list
|
|
152 vcftools_out_data = vcftools_out_str.strip().split('\t')
|
|
153 # Convert the chromStart to int
|
|
154 vcftools_out_data[1] = int(vcftools_out_data[1])
|
|
155 # Calc chromEnd
|
|
156 chrom_end = vcftools_out_data[1] + 1
|
|
157 # Add chrom_end to the list
|
|
158 vcftools_out_data = vcftools_out_data + [chrom_end]
|
|
159 # Return the list as a string (with newline element)
|
|
160 return '\t'.join(map(str, vcftools_out_data)) + '\n'
|
|
161 else:
|
|
162 # Remove the header
|
|
163 return ''
|
|
164
|
|
165 def pipe_vcftools (vcftools_call_args):
|
|
166 '''
|
|
167 Calls vcftools with pipe output
|
|
168
|
|
169 The output of this function is the stdout and stderr of vcftools. This
|
|
170 function should only be used if vcftools is being used as the stdin of
|
|
171 another function. Please note that this function does not check the for
|
|
172 errors in the vcftools call. Please check for errors after the call is
|
|
173 closed using check_vcftools_for_errors.
|
|
174
|
|
175 Parameters
|
|
176 ----------
|
|
177 vcftools_call_args : list
|
|
178 vcftools arguments
|
|
179
|
|
180 Returns
|
|
181 -------
|
|
182 vcftools_call : subprocess.Popen
|
|
183 vcftools subprocess call
|
|
184 vcftools_call.stdout : PIPE
|
|
185 vcftools stdout PIPE (Results)
|
|
186 vcftools_call.stderr : PIPE
|
|
187 vcftools stderr PIPE (Log)
|
|
188
|
|
189 '''
|
|
190
|
|
191 # vcftools subprocess call
|
|
192 vcftools_call = subprocess.Popen(['vcftools', '--stdout'] + list(map(str, vcftools_call_args)), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
193
|
|
194 return vcftools_call
|
|
195
|
|
196 def pipe_vcftools_to_bed_file (vcftools_call_args, output_filename):
|
|
197
|
|
198 '''
|
|
199 Pipes site-file output of vcftools to a bed formmated file
|
|
200
|
|
201 The purpose of this function is to avoid creating large uncompressed
|
|
202 vcf files by directly piping the output of vcftools to bgzip. This
|
|
203 results in creating a vcf.gz file without any intermediates.
|
|
204
|
|
205 Parameters
|
|
206 ----------
|
|
207 vcftools_call_args : list
|
|
208 vcftools arguments
|
|
209 output_filename : str
|
|
210 Filename of the bed file
|
|
211
|
|
212 '''
|
|
213 # Open vcftools pipe
|
|
214 vcftools_call = pipe_vcftools(vcftools_call_args)
|
|
215
|
|
216 # Create the bed file
|
|
217 bed_output = open(output_filename, 'w')
|
|
218
|
|
219 try:
|
|
220 # Iterate the vcftools stdout unless error occurs
|
|
221 for vcftools_stdout_line in iter(vcftools_call.stdout.readline, b''):
|
|
222 bed_output.write(cvt_vcftools_site_to_bed(vcftools_stdout_line))
|
|
223 # Close the bed file
|
|
224 bed_output.close()
|
|
225 except:
|
|
226 # Close the bed file
|
|
227 bed_output.close()
|
|
228 # Delete the file
|
|
229 os.remove(output_filename)
|
|
230
|
|
231 # Wait for vctools to finish
|
|
232 vcftools_call.wait()
|
|
233
|
|
234 # Close the vcftools stdout
|
|
235 vcftools_call.stdout.close()
|
|
236
|
|
237 # Read the vcftools stderr
|
|
238 vcftools_stderr = vcftools_call.stderr.read()
|
|
239
|
|
240 # Check if code is running in python 3
|
|
241 if sys.version_info[0] == 3:
|
|
242 # Convert bytes to string
|
|
243 vcftools_stderr = vcftools_stderr.decode()
|
|
244
|
|
245 # Check that the log file was created correctly
|
|
246 check_vcftools_for_errors(vcftools_stderr)
|
|
247
|
|
248 logging.info('vcftools call complete')
|
|
249
|
|
250 return vcftools_stderr
|
|
251
|
|
252 def pipe_vcftools_bgzip (vcftools_call_args, output_filename):
|
|
253 '''
|
|
254 Pipes the output of vcftools to bgzip
|
|
255
|
|
256 The purpose of this function is to avoid creating large uncompressed
|
|
257 vcf files by directly piping the output of vcftools to bgzip. This
|
|
258 results in creating a vcf.gz file without any intermediates.
|
|
259
|
|
260 Parameters
|
|
261 ----------
|
|
262 vcftools_call_args : list
|
|
263 vcftools arguments
|
|
264 output_filename : str
|
|
265 Filename of the compressed vcf file
|
|
266
|
|
267 '''
|
|
268
|
|
269 vcftools_call = pipe_vcftools(vcftools_call_args)
|
|
270
|
|
271 # Create bgzip output file
|
|
272 bgzip_output = open(output_filename, 'wb')
|
|
273
|
|
274 # bgzip subprocess call
|
|
275 bgzip_call = subprocess.Popen(['bgzip'], stdin = vcftools_call.stdout, stdout = bgzip_output, stderr = subprocess.PIPE)
|
|
276
|
|
277 # Wait for vctools to finish
|
|
278 vcftools_call.wait()
|
|
279
|
|
280 # Close the vcftools stdout
|
|
281 vcftools_call.stdout.close()
|
|
282
|
|
283 # Read the vcftools stderr
|
|
284 vcftools_stderr = vcftools_call.stderr.read()
|
|
285
|
|
286 # Check if code is running in python 3
|
|
287 if sys.version_info[0] == 3:
|
|
288 # Convert bytes to string
|
|
289 vcftools_stderr = vcftools_stderr.decode()
|
|
290
|
|
291 # Check that the log file was created correctly
|
|
292 check_vcftools_for_errors(vcftools_stderr)
|
|
293
|
|
294 # Wait for bgzip to finish
|
|
295 bgzip_call.wait()
|
|
296
|
|
297 # Close the compressed vcf file
|
|
298 bgzip_output.close()
|
|
299
|
|
300 # Save the stderr from bgzip, stdout = None
|
|
301 bgzip_stdout, bgzip_stderr = bgzip_call.communicate()
|
|
302
|
|
303 # Check if code is running in python 3
|
|
304 if sys.version_info[0] == 3:
|
|
305 # Convert bytes to string
|
|
306 bgzip_stderr = bgzip_stderr.decode()
|
|
307
|
|
308 # Check that output file was compressed correctly
|
|
309 check_bgzip_for_errors(bgzip_stderr)
|
|
310
|
|
311 logging.info('vcftools and bgzip calls complete')
|
|
312
|
|
313 return vcftools_stderr
|
|
314
|
|
315 def pipe_vcftools_bcftools (vcftools_call_args, output_filename):
|
|
316 '''
|
|
317 Pipes the output of vcftools to bcftools
|
|
318
|
|
319 The purpose of this function is to avoid the vcftools command
|
|
320 --recode-bcf that may result in malformed BCF files. To avoid large
|
|
321 uncompressed intermediates, this function pipes the stdout of vcftools
|
|
322 to bcftools.
|
|
323
|
|
324 Parameters
|
|
325 ----------
|
|
326 vcftools_call_args : list
|
|
327 vcftools arguments
|
|
328 output_filename : str
|
|
329 Filename of the BCF file
|
|
330
|
|
331 '''
|
|
332
|
|
333 vcftools_call = pipe_vcftools(vcftools_call_args)
|
|
334
|
|
335 # Holds the arguments to convert to BCF format
|
|
336 convert_args = ['view', '-O', 'b']
|
|
337
|
|
338 # Assigns the output file to the arguments
|
|
339 convert_args.extend(['-o', output_filename])
|
|
340
|
|
341 # bcftools subprocess call
|
|
342 bcftools_call = subprocess.Popen(['bcftools'] + convert_args, stdin = vcftools_call.stdout, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
|
|
343
|
|
344 # Wait for vctools to finish
|
|
345 vcftools_call.wait()
|
|
346
|
|
347 # Close the vcftools stdout
|
|
348 vcftools_call.stdout.close()
|
|
349
|
|
350 # Read the vcftools stderr
|
|
351 vcftools_stderr = vcftools_call.stderr.read()
|
|
352
|
|
353 # Check if code is running in python 3
|
|
354 if sys.version_info[0] == 3:
|
|
355 # Convert bytes to string
|
|
356 vcftools_stderr = vcftools_stderr.decode()
|
|
357
|
|
358 # Check that the log file was created correctly
|
|
359 check_vcftools_for_errors(vcftools_stderr)
|
|
360
|
|
361 # Wait for bgzip to finish
|
|
362 bcftools_call.wait()
|
|
363
|
|
364 # Save the stderr from bgzip, stdout = None
|
|
365 bcftools_stdout, bcftools_stderr = bcftools_call.communicate()
|
|
366
|
|
367 # Check if code is running in python 3
|
|
368 if sys.version_info[0] == 3:
|
|
369 # Convert bytes to string
|
|
370 bcftools_stderr = bcftools_stderr.decode()
|
|
371
|
|
372 # Check that output file was compressed correctly
|
|
373 check_bcftools_for_errors(bcftools_stderr)
|
|
374
|
|
375 logging.info('vcftools and bcftools calls complete')
|
|
376
|
|
377 return vcftools_stderr
|
|
378
|
|
379 def pipe_vcftools_to_file (vcftools_call_args, output_filename, append_output = False):
|
|
380 '''
|
|
381 Pipes file output of vcftools to a standard file
|
|
382
|
|
383 The function calls vcftools. Returns the stderr of vcftools to
|
|
384 create log file of the call. The function may be used to append multiple
|
|
385 calls to vcftools to a single file
|
|
386
|
|
387 Parameters
|
|
388 ----------
|
|
389 vcftools_call_args : list
|
|
390 vcftools arguments
|
|
391 append_output : bool
|
|
392 The output file should be written in append mode
|
|
393
|
|
394 Returns
|
|
395 -------
|
|
396 vcftools_err : str
|
|
397 vcftools log output
|
|
398
|
|
399 Raises
|
|
400 ------
|
|
401 Exception
|
|
402 If vcftools stderr returns an error
|
|
403 '''
|
|
404
|
|
405 # Open vcftools pipe
|
|
406 vcftools_call = pipe_vcftools(vcftools_call_args)
|
|
407
|
|
408 # Check if the output should be opened in append mode
|
|
409 if append_output:
|
|
410 # Create the output file (in append mode)
|
|
411 output_file = open(output_filename, 'a')
|
|
412 else:
|
|
413 # Create the output file (in write mode)
|
|
414 output_file = open(output_filename, 'w')
|
|
415
|
|
416
|
|
417 try:
|
|
418 # Create iterator of the vcftools stdout
|
|
419 stdout_iter = iter(vcftools_call.stdout.readline, b'')
|
|
420
|
|
421 # Check if the output is being appended and the file is empty
|
|
422 if append_output and os.stat(output_filename).st_size != 0:
|
|
423 # Skip the header if the file isn't empty and appending
|
|
424 next(stdout_iter)
|
|
425
|
|
426 # Iterate the vcftools stdout
|
|
427 for vcftools_stdout_line in stdout_iter:
|
|
428
|
|
429 # Check if code is running in python 3
|
|
430 if sys.version_info[0] == 3:
|
|
431 # Convert bytes to string
|
|
432 vcftools_stdout_line = vcftools_stdout_line.decode()
|
|
433
|
|
434 output_file.write(vcftools_stdout_line)
|
|
435
|
|
436 # Close the bed file
|
|
437 output_file.close()
|
|
438
|
|
439 except:
|
|
440 # Close the bed file
|
|
441 output_file.close()
|
|
442 # Delete the file
|
|
443 os.remove(output_filename)
|
|
444
|
|
445 raise Exception('vcftools to python pipe error')
|
|
446
|
|
447 # Wait for vctools to finish
|
|
448 vcftools_call.wait()
|
|
449
|
|
450 # Close the vcftools stdout
|
|
451 vcftools_call.stdout.close()
|
|
452
|
|
453 # Read the vcftools stderr
|
|
454 vcftools_stderr = vcftools_call.stderr.read()
|
|
455
|
|
456 # Check if code is running in python 3
|
|
457 if sys.version_info[0] == 3:
|
|
458 # Convert bytes to string
|
|
459 vcftools_stderr = vcftools_stderr.decode()
|
|
460
|
|
461 # Check that the log file was created correctly
|
|
462 check_vcftools_for_errors(vcftools_stderr)
|
|
463
|
|
464 logging.info('vcftools call complete')
|
|
465
|
|
466 return vcftools_stderr
|
|
467
|
|
468 def standard_vcftools_call (vcftools_call_args):
|
|
469 '''
|
|
470 Calls vcftools
|
|
471
|
|
472 The function calls vcftools. Returns the stderr of vcftools to
|
|
473 create log file of the call.
|
|
474
|
|
475 Parameters
|
|
476 ----------
|
|
477 vcftools_call_args : list
|
|
478 vcftools arguments
|
|
479
|
|
480 Returns
|
|
481 -------
|
|
482 vcftools_out : str
|
|
483 vcftools call output
|
|
484 vcftools_err : str
|
|
485 vcftools log output
|
|
486
|
|
487 Raises
|
|
488 ------
|
|
489 Exception
|
|
490 If vcftools stderr returns an error
|
|
491 '''
|
|
492
|
|
493 # vcftools subprocess call without stdout
|
|
494 vcftools_call = subprocess.Popen(['vcftools'] + list(map(str, vcftools_call_args)), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
495
|
|
496 # Wait for vcftools to finish
|
|
497 vcftools_stdout, vcftools_stderr = vcftools_call.communicate()
|
|
498
|
|
499 # Check if code is running in python 3
|
|
500 if sys.version_info[0] == 3:
|
|
501 # Convert bytes to string
|
|
502 vcftools_stderr = vcftools_stderr.decode()
|
|
503
|
|
504 logging.info('vcftools call complete')
|
|
505
|
|
506 # Check that the log file was created correctly
|
|
507 check_vcftools_for_errors(vcftools_stderr)
|
|
508
|
|
509 return vcftools_stderr
|
|
510
|
|
511 def call_vcftools (vcftools_call_args, output_format = None, output_filename = None):
|
|
512 '''
|
|
513 Calls vcftools
|
|
514
|
|
515 The function calls vcftools. Returns the stderr of vcftools to
|
|
516 create log file of the call.
|
|
517
|
|
518 Parameters
|
|
519 ----------
|
|
520 vcftools_call_args : list
|
|
521 vcftools arguments
|
|
522 output_format : str
|
|
523 The output format
|
|
524 output_filename : str
|
|
525 The output filename assigned by vcftools (for piped calls)
|
|
526
|
|
527 Returns
|
|
528 -------
|
|
529 vcftools_out : str
|
|
530 vcftools call output
|
|
531 vcftools_err : str
|
|
532 vcftools log output
|
|
533
|
|
534 Raises
|
|
535 ------
|
|
536 Exception
|
|
537 If vcftools stderr returns an error
|
|
538 '''
|
|
539
|
|
540 # Check if the output is a bgzipped vcf
|
|
541 if output_format == 'vcf.gz':
|
|
542 # Pipe vcftools stdout to bgzip to create a bgzipped vcf
|
|
543 vcftools_err = pipe_vcftools_bgzip(vcftools_call_args, output_filename)
|
|
544 # Check if the output is a bcf
|
|
545 elif output_format == 'bcf':
|
|
546 # Pipe vcftools stdout to bgzip to create a bgzipped vcf
|
|
547 vcftools_err = pipe_vcftools_bcftools(vcftools_call_args, output_filename)
|
|
548 elif output_format == 'removed_bed' or output_format == 'kept_bed':
|
|
549 # Pipe vcftools stdout to bed file
|
|
550 vcftools_err = pipe_vcftools_to_bed_file(vcftools_call_args, output_filename)
|
|
551 elif output_format == 'het-fis':
|
|
552 vcftools_err = pipe_vcftools_to_file(vcftools_call_args, output_filename, append_output = True)
|
|
553 else:
|
|
554 # Call vcftools under standard conditions
|
|
555 vcftools_err = standard_vcftools_call(vcftools_call_args)
|
|
556
|
|
557 # Return the log
|
|
558 return vcftools_err
|
|
559
|
|
560 def check_for_vcftools_output (vcftools_output):
|
|
561 '''
|
|
562 Checks for the previous vcftools output
|
|
563
|
|
564 Confirms that neither a previous vcftools log or output file exists.
|
|
565
|
|
566 Parameters
|
|
567 ----------
|
|
568 vcftools_output : str
|
|
569 Specifies the output filename to be checked
|
|
570
|
|
571 Raises
|
|
572 ------
|
|
573 IOError
|
|
574 If the vcftools output file exists
|
|
575 IOError
|
|
576 If the vcftools log file exists
|
|
577
|
|
578 '''
|
|
579 # Check if output file already exists
|
|
580 if os.path.isfile(vcftools_output):
|
|
581 raise IOError('VCF output file already exists')
|
|
582
|
|
583 logging.info('Output file assigned')
|
|
584
|
|
585 # Check if log file already exists
|
|
586 if os.path.isfile(vcftools_output + '.log'):
|
|
587 raise IOError('Log file already exists')
|
|
588
|
|
589 logging.info('Log file assigned')
|
|
590
|
|
591 def delete_vcftools_output (vcftools_output):
|
|
592 '''
|
|
593 Deletes previous vcftools output
|
|
594
|
|
595 Confirms if previous vcftools output exists, and if so, deletes it
|
|
596
|
|
597 Parameters
|
|
598 ----------
|
|
599 vcftools_output : str
|
|
600 Specifies the output filename to be deleted
|
|
601
|
|
602 Raises
|
|
603 ------
|
|
604 IOError
|
|
605 If the vcftools output cannot be deleted
|
|
606 IOError
|
|
607 If the vcftools log cannot be deleted
|
|
608 '''
|
|
609
|
|
610 # Check if output file already exists
|
|
611 if os.path.isfile(vcftools_output):
|
|
612 try:
|
|
613 # Delete the output
|
|
614 os.remove(vcftools_output)
|
|
615 except:
|
|
616 raise IOError('VCF output file cannot be deleted')
|
|
617
|
|
618 logging.info('Output file assigned')
|
|
619
|
|
620 # Check if log file already exists
|
|
621 if os.path.isfile(vcftools_output + '.log'):
|
|
622 try:
|
|
623 # Delete the output
|
|
624 os.remove(vcftools_output + '.log')
|
|
625 except:
|
|
626 raise IOError('Log file cannot be deleted')
|
|
627
|
|
628 logging.info('Log file assigned')
|
|
629
|
|
630 def check_vcftools_for_errors (vcftools_stderr):
|
|
631 '''
|
|
632 Checks the vcftools stderr for errors
|
|
633
|
|
634 Parameters
|
|
635 ----------
|
|
636 vcftools_stderr : str
|
|
637 vcftools stderr
|
|
638
|
|
639 Raises
|
|
640 ------
|
|
641 IOError
|
|
642 If vcftools stderr returns an error
|
|
643 '''
|
|
644
|
|
645 # Returns True if the job completed without error
|
|
646 if 'Run Time' in str(vcftools_stderr):
|
|
647 pass
|
|
648
|
|
649 # Print output for vcftools if error is detected
|
|
650 elif 'Error' in str(vcftools_stderr):
|
|
651 # Splits log into list of lines
|
|
652 vcftools_stderr_lines = vcftools_stderr.splitlines()
|
|
653 # Prints the error(s)
|
|
654 raise Exception('\n'.join((output_line for output_line in vcftools_stderr_lines if output_line.startswith('Error'))))
|
|
655
|
|
656 # Print output if not completed and no error found. Unlikely to be used, but included.
|
|
657 else:
|
|
658 raise Exception(vcftools_stderr)
|
|
659
|
|
660 def produce_vcftools_output (output, filename, append_mode = False, strip_header = False):
|
|
661 '''
|
|
662 Creates the vcftools output file
|
|
663
|
|
664 This function will create an output file from the vcftools stdout.
|
|
665 Please run `check_vcftools_for_errors` prior to check that vcftools
|
|
666 finished without error.
|
|
667
|
|
668 Parameters
|
|
669 ----------
|
|
670 output : str
|
|
671 vcftools stdout
|
|
672 filename : str
|
|
673 Specifies the filename for the output file
|
|
674 append_mode : bool
|
|
675 Used to create a single output file from multiple calls
|
|
676 strip_header : bool
|
|
677 Used to remove the header if not needed
|
|
678
|
|
679 Returns
|
|
680 -------
|
|
681 output : file
|
|
682 vcftools output file
|
|
683
|
|
684 '''
|
|
685
|
|
686 # Check if the header should be stripped
|
|
687 if strip_header:
|
|
688 output = ''.join(output.splitlines(True)[1:])
|
|
689
|
|
690 # Check if single log file is required from multiple calls
|
|
691 if append_mode:
|
|
692 vcftools_log_file = open(filename,'a')
|
|
693 else:
|
|
694 vcftools_log_file = open(filename,'w')
|
|
695
|
|
696 vcftools_log_file.write(str(output))
|
|
697 vcftools_log_file.close()
|
|
698
|
|
699 def produce_vcftools_log (output, filename, append_mode = False):
|
|
700 '''
|
|
701 Creates the vcftools log file
|
|
702
|
|
703 This function will create a log file from the vcftools stderr. Please
|
|
704 run `check_vcftools_for_errors` prior to check that vcftools finished
|
|
705 without error.
|
|
706
|
|
707 Parameters
|
|
708 ----------
|
|
709 output : str
|
|
710 vcftools stderr
|
|
711 filename : str
|
|
712 Specifies the filename for the log file
|
|
713 append_mode : bool
|
|
714 Used to create a single log file from multiple calls
|
|
715
|
|
716 Returns
|
|
717 -------
|
|
718 output : file
|
|
719 vcftools log file
|
|
720
|
|
721 '''
|
|
722 # Check if single log file is required from multiple calls
|
|
723 if append_mode:
|
|
724 vcftools_log_file = open(filename + '.log','a')
|
|
725 else:
|
|
726 vcftools_log_file = open(filename + '.log','w')
|
|
727
|
|
728 vcftools_log_file.write(str(output))
|
|
729 vcftools_log_file.close()
|
|
730
|
|
731 def assign_vcftools_input_arg (filename):
|
|
732 '''
|
|
733 Confirms file format for vcftools
|
|
734
|
|
735 Parameters
|
|
736 ----------
|
|
737 filename : str
|
|
738 Specifies the input filename of unknown format
|
|
739
|
|
740 Returns
|
|
741 -------
|
|
742 list
|
|
743 Returns vcftools input command for `filename`
|
|
744
|
|
745 Raises
|
|
746 ------
|
|
747 IOError
|
|
748 If filename is an unknown file format
|
|
749 '''
|
|
750
|
|
751 # True if file extensions is recognized by vcftools
|
|
752 if filename.endswith('.vcf') or filename.endswith('.vcf.gz') or filename.endswith('.bcf'):
|
|
753 # Assign the associated input command
|
|
754 if filename.endswith('.vcf'):
|
|
755 return ['--vcf', filename]
|
|
756 elif filename.endswith('.vcf.gz'):
|
|
757 return ['--gzvcf', filename]
|
|
758 elif filename.endswith('.bcf'):
|
|
759 return ['--bcf', filename]
|
|
760
|
|
761 # True if file extension is unknown or not recognized
|
|
762 else:
|
|
763
|
|
764 # Checks if the file is unzipped, bgzipped, or gzipped
|
|
765 vcfname_format = checkFormat(filename)
|
|
766
|
|
767 # Assign the associated input command, or return an error.
|
|
768 if vcfname_format == 'vcf':
|
|
769 return ['--vcf', filename]
|
|
770 elif vcfname_format == 'bgzip':
|
|
771 return ['--gzvcf', filename]
|
|
772 elif vcfname_format == 'bcf':
|
|
773 return ['--bcf', filename]
|
|
774 else:
|
|
775 raise Exception('Unknown VCF file format')
|