Mercurial > repos > jaredgk > ppp_vcfphase
comparison vcftools.py @ 3:d1e3db7f6521 draft
Uploaded
author | jaredgk |
---|---|
date | Wed, 17 Oct 2018 17:28:38 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:54c84f7dcb2c | 3:d1e3db7f6521 |
---|---|
1 import os | |
2 import sys | |
3 import logging | |
4 import subprocess | |
5 | |
6 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir,'jared'))) | |
7 | |
8 from vcf_reader_func import checkFormat | |
9 from bcftools import check_bcftools_for_errors | |
10 | |
11 def check_bgzip_for_errors (bgzip_stderr): | |
12 ''' | |
13 Checks the bgzip stderr for errors | |
14 | |
15 Parameters | |
16 ---------- | |
17 bgzip_stderr : str | |
18 bgzip stderr | |
19 | |
20 Raises | |
21 ------ | |
22 IOError | |
23 If bgzip stderr returns an error | |
24 ''' | |
25 | |
26 if bgzip_stderr: | |
27 raise IOError('Error occured while compressing the vcf file') | |
28 | |
29 def bgzip_decompress_vcfgz (vcfgz_filename, out_prefix = '', keep_original = False): | |
30 ''' | |
31 Converts a vcf.gz to vcf | |
32 | |
33 The function automates bgzip to decompress a vcf.gz file into a vcf | |
34 | |
35 Parameters | |
36 ---------- | |
37 vcfgz_filename : str | |
38 The file name of the vcf.gz file to be decompressed | |
39 out_prefix : str | |
40 Output file prefix (i.e. filename without extension) | |
41 keep_original : bool | |
42 Specifies if the original file should be kept | |
43 | |
44 Raises | |
45 ------ | |
46 IOError | |
47 Error in creating the compressed file | |
48 ''' | |
49 | |
50 # Run bgzip with stdout piped to file | |
51 if keep_original or out_prefix: | |
52 | |
53 if out_prefix: | |
54 | |
55 # Assign the bgzip filename | |
56 vcf_filename = out_prefix + '.vcf' | |
57 | |
58 else: | |
59 | |
60 # Seperate into path and filename | |
61 split_path, split_filename = os.path.split(vcfgz_filename) | |
62 | |
63 # Remove any file extensions | |
64 vcf_basename = split_filename.split(os.extsep)[0] + '.vcf' | |
65 | |
66 # Join path and filename | |
67 vcf_filename = os.path.join(split_path, vcf_basename) | |
68 | |
69 # Create the output file | |
70 vcf_file = open(vcf_filename, 'w') | |
71 | |
72 # bgzip subprocess call | |
73 bgzip_call = subprocess.Popen(['bgzip', '-dc', vcfgz_filename], stdout = vcf_file, stderr = subprocess.PIPE) | |
74 | |
75 # Run bgzip normally | |
76 else: | |
77 | |
78 # bgzip subprocess call | |
79 bgzip_call = subprocess.Popen(['bgzip', '-d', vcfgz_filename], stdout = subprocess.PIPE, stderr = subprocess.PIPE) | |
80 | |
81 # Save the stdout and stderr from bgzip | |
82 bgzip_out, bgzip_err = bgzip_call.communicate() | |
83 | |
84 # Check that output file was compressed correctly | |
85 check_bgzip_for_errors(bgzip_err) | |
86 | |
87 # Delete input when also using an output prefix | |
88 if out_prefix and not keep_original: | |
89 os.remove(vcfgz_filename) | |
90 | |
91 def bgzip_compress_vcf (vcf_filename, out_prefix = '', keep_original = False): | |
92 ''' | |
93 Converts a vcf to vcf.gz | |
94 | |
95 The function automates bgzip to compress a vcf file into a vcf.gz | |
96 | |
97 Parameters | |
98 ---------- | |
99 vcf_filename : str | |
100 The file name of the vcf file to be compressed | |
101 keep_original : bool | |
102 Specifies if the original file should be kept | |
103 | |
104 Raises | |
105 ------ | |
106 IOError | |
107 Error in creating the compressed file | |
108 ''' | |
109 | |
110 # Compress and keep the original file | |
111 if keep_original or out_prefix: | |
112 | |
113 if out_prefix: | |
114 | |
115 # Assign the filename | |
116 vcfgz_filename = out_prefix + '.vcf.gz' | |
117 | |
118 else: | |
119 | |
120 # Seperate into path and filename | |
121 split_path, split_filename = os.path.split(vcfgz_filename) | |
122 | |
123 # Remove any file extensions | |
124 vcfgz_basename = split_filename.split(os.extsep)[0] + '.vcf.gz' | |
125 | |
126 # Join path and filename | |
127 vcfgz_filename = os.path.join(split_path, vcfgz_basename) | |
128 | |
129 | |
130 # Create the output file | |
131 vcfgz_file = open(vcfgz_filename, 'w') | |
132 | |
133 # bgzip subprocess call | |
134 bgzip_call = subprocess.Popen(['bgzip', '-c', vcf_filename], stdout = vcfgz_file, stderr = subprocess.PIPE) | |
135 | |
136 # Compress and do not keep the original file | |
137 else: | |
138 | |
139 # bgzip subprocess call | |
140 bgzip_call = subprocess.Popen(['bgzip', vcf_filename], stdout = subprocess.PIPE, stderr = subprocess.PIPE) | |
141 | |
142 # Save the stdout and stderr from bgzip | |
143 bgzip_out, bgzip_err = bgzip_call.communicate() | |
144 | |
145 # Check that output file was compressed correctly | |
146 check_bgzip_for_errors(bgzip_err) | |
147 | |
148 def cvt_vcftools_site_to_bed (vcftools_out_str): | |
149 # Check if str in the header | |
150 if 'CHROM' not in vcftools_out_str or 'POS' not in vcftools_out_str: | |
151 # Split the line into a list | |
152 vcftools_out_data = vcftools_out_str.strip().split('\t') | |
153 # Convert the chromStart to int | |
154 vcftools_out_data[1] = int(vcftools_out_data[1]) | |
155 # Calc chromEnd | |
156 chrom_end = vcftools_out_data[1] + 1 | |
157 # Add chrom_end to the list | |
158 vcftools_out_data = vcftools_out_data + [chrom_end] | |
159 # Return the list as a string (with newline element) | |
160 return '\t'.join(map(str, vcftools_out_data)) + '\n' | |
161 else: | |
162 # Remove the header | |
163 return '' | |
164 | |
165 def pipe_vcftools (vcftools_call_args): | |
166 ''' | |
167 Calls vcftools with pipe output | |
168 | |
169 The output of this function is the stdout and stderr of vcftools. This | |
170 function should only be used if vcftools is being used as the stdin of | |
171 another function. Please note that this function does not check the for | |
172 errors in the vcftools call. Please check for errors after the call is | |
173 closed using check_vcftools_for_errors. | |
174 | |
175 Parameters | |
176 ---------- | |
177 vcftools_call_args : list | |
178 vcftools arguments | |
179 | |
180 Returns | |
181 ------- | |
182 vcftools_call : subprocess.Popen | |
183 vcftools subprocess call | |
184 vcftools_call.stdout : PIPE | |
185 vcftools stdout PIPE (Results) | |
186 vcftools_call.stderr : PIPE | |
187 vcftools stderr PIPE (Log) | |
188 | |
189 ''' | |
190 | |
191 # vcftools subprocess call | |
192 vcftools_call = subprocess.Popen(['vcftools', '--stdout'] + list(map(str, vcftools_call_args)), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
193 | |
194 return vcftools_call | |
195 | |
196 def pipe_vcftools_to_bed_file (vcftools_call_args, output_filename): | |
197 | |
198 ''' | |
199 Pipes site-file output of vcftools to a bed formmated file | |
200 | |
201 The purpose of this function is to avoid creating large uncompressed | |
202 vcf files by directly piping the output of vcftools to bgzip. This | |
203 results in creating a vcf.gz file without any intermediates. | |
204 | |
205 Parameters | |
206 ---------- | |
207 vcftools_call_args : list | |
208 vcftools arguments | |
209 output_filename : str | |
210 Filename of the bed file | |
211 | |
212 ''' | |
213 # Open vcftools pipe | |
214 vcftools_call = pipe_vcftools(vcftools_call_args) | |
215 | |
216 # Create the bed file | |
217 bed_output = open(output_filename, 'w') | |
218 | |
219 try: | |
220 # Iterate the vcftools stdout unless error occurs | |
221 for vcftools_stdout_line in iter(vcftools_call.stdout.readline, b''): | |
222 bed_output.write(cvt_vcftools_site_to_bed(vcftools_stdout_line)) | |
223 # Close the bed file | |
224 bed_output.close() | |
225 except: | |
226 # Close the bed file | |
227 bed_output.close() | |
228 # Delete the file | |
229 os.remove(output_filename) | |
230 | |
231 # Wait for vctools to finish | |
232 vcftools_call.wait() | |
233 | |
234 # Close the vcftools stdout | |
235 vcftools_call.stdout.close() | |
236 | |
237 # Read the vcftools stderr | |
238 vcftools_stderr = vcftools_call.stderr.read() | |
239 | |
240 # Check if code is running in python 3 | |
241 if sys.version_info[0] == 3: | |
242 # Convert bytes to string | |
243 vcftools_stderr = vcftools_stderr.decode() | |
244 | |
245 # Check that the log file was created correctly | |
246 check_vcftools_for_errors(vcftools_stderr) | |
247 | |
248 logging.info('vcftools call complete') | |
249 | |
250 return vcftools_stderr | |
251 | |
252 def pipe_vcftools_bgzip (vcftools_call_args, output_filename): | |
253 ''' | |
254 Pipes the output of vcftools to bgzip | |
255 | |
256 The purpose of this function is to avoid creating large uncompressed | |
257 vcf files by directly piping the output of vcftools to bgzip. This | |
258 results in creating a vcf.gz file without any intermediates. | |
259 | |
260 Parameters | |
261 ---------- | |
262 vcftools_call_args : list | |
263 vcftools arguments | |
264 output_filename : str | |
265 Filename of the compressed vcf file | |
266 | |
267 ''' | |
268 | |
269 vcftools_call = pipe_vcftools(vcftools_call_args) | |
270 | |
271 # Create bgzip output file | |
272 bgzip_output = open(output_filename, 'wb') | |
273 | |
274 # bgzip subprocess call | |
275 bgzip_call = subprocess.Popen(['bgzip'], stdin = vcftools_call.stdout, stdout = bgzip_output, stderr = subprocess.PIPE) | |
276 | |
277 # Wait for vctools to finish | |
278 vcftools_call.wait() | |
279 | |
280 # Close the vcftools stdout | |
281 vcftools_call.stdout.close() | |
282 | |
283 # Read the vcftools stderr | |
284 vcftools_stderr = vcftools_call.stderr.read() | |
285 | |
286 # Check if code is running in python 3 | |
287 if sys.version_info[0] == 3: | |
288 # Convert bytes to string | |
289 vcftools_stderr = vcftools_stderr.decode() | |
290 | |
291 # Check that the log file was created correctly | |
292 check_vcftools_for_errors(vcftools_stderr) | |
293 | |
294 # Wait for bgzip to finish | |
295 bgzip_call.wait() | |
296 | |
297 # Close the compressed vcf file | |
298 bgzip_output.close() | |
299 | |
300 # Save the stderr from bgzip, stdout = None | |
301 bgzip_stdout, bgzip_stderr = bgzip_call.communicate() | |
302 | |
303 # Check if code is running in python 3 | |
304 if sys.version_info[0] == 3: | |
305 # Convert bytes to string | |
306 bgzip_stderr = bgzip_stderr.decode() | |
307 | |
308 # Check that output file was compressed correctly | |
309 check_bgzip_for_errors(bgzip_stderr) | |
310 | |
311 logging.info('vcftools and bgzip calls complete') | |
312 | |
313 return vcftools_stderr | |
314 | |
315 def pipe_vcftools_bcftools (vcftools_call_args, output_filename): | |
316 ''' | |
317 Pipes the output of vcftools to bcftools | |
318 | |
319 The purpose of this function is to avoid the vcftools command | |
320 --recode-bcf that may result in malformed BCF files. To avoid large | |
321 uncompressed intermediates, this function pipes the stdout of vcftools | |
322 to bcftools. | |
323 | |
324 Parameters | |
325 ---------- | |
326 vcftools_call_args : list | |
327 vcftools arguments | |
328 output_filename : str | |
329 Filename of the BCF file | |
330 | |
331 ''' | |
332 | |
333 vcftools_call = pipe_vcftools(vcftools_call_args) | |
334 | |
335 # Holds the arguments to convert to BCF format | |
336 convert_args = ['view', '-O', 'b'] | |
337 | |
338 # Assigns the output file to the arguments | |
339 convert_args.extend(['-o', output_filename]) | |
340 | |
341 # bcftools subprocess call | |
342 bcftools_call = subprocess.Popen(['bcftools'] + convert_args, stdin = vcftools_call.stdout, stdout = subprocess.PIPE, stderr = subprocess.PIPE) | |
343 | |
344 # Wait for vctools to finish | |
345 vcftools_call.wait() | |
346 | |
347 # Close the vcftools stdout | |
348 vcftools_call.stdout.close() | |
349 | |
350 # Read the vcftools stderr | |
351 vcftools_stderr = vcftools_call.stderr.read() | |
352 | |
353 # Check if code is running in python 3 | |
354 if sys.version_info[0] == 3: | |
355 # Convert bytes to string | |
356 vcftools_stderr = vcftools_stderr.decode() | |
357 | |
358 # Check that the log file was created correctly | |
359 check_vcftools_for_errors(vcftools_stderr) | |
360 | |
361 # Wait for bgzip to finish | |
362 bcftools_call.wait() | |
363 | |
364 # Save the stderr from bgzip, stdout = None | |
365 bcftools_stdout, bcftools_stderr = bcftools_call.communicate() | |
366 | |
367 # Check if code is running in python 3 | |
368 if sys.version_info[0] == 3: | |
369 # Convert bytes to string | |
370 bcftools_stderr = bcftools_stderr.decode() | |
371 | |
372 # Check that output file was compressed correctly | |
373 check_bcftools_for_errors(bcftools_stderr) | |
374 | |
375 logging.info('vcftools and bcftools calls complete') | |
376 | |
377 return vcftools_stderr | |
378 | |
379 def pipe_vcftools_to_file (vcftools_call_args, output_filename, append_output = False): | |
380 ''' | |
381 Pipes file output of vcftools to a standard file | |
382 | |
383 The function calls vcftools. Returns the stderr of vcftools to | |
384 create log file of the call. The function may be used to append multiple | |
385 calls to vcftools to a single file | |
386 | |
387 Parameters | |
388 ---------- | |
389 vcftools_call_args : list | |
390 vcftools arguments | |
391 append_output : bool | |
392 The output file should be written in append mode | |
393 | |
394 Returns | |
395 ------- | |
396 vcftools_err : str | |
397 vcftools log output | |
398 | |
399 Raises | |
400 ------ | |
401 Exception | |
402 If vcftools stderr returns an error | |
403 ''' | |
404 | |
405 # Open vcftools pipe | |
406 vcftools_call = pipe_vcftools(vcftools_call_args) | |
407 | |
408 # Check if the output should be opened in append mode | |
409 if append_output: | |
410 # Create the output file (in append mode) | |
411 output_file = open(output_filename, 'a') | |
412 else: | |
413 # Create the output file (in write mode) | |
414 output_file = open(output_filename, 'w') | |
415 | |
416 | |
417 try: | |
418 # Create iterator of the vcftools stdout | |
419 stdout_iter = iter(vcftools_call.stdout.readline, b'') | |
420 | |
421 # Check if the output is being appended and the file is empty | |
422 if append_output and os.stat(output_filename).st_size != 0: | |
423 # Skip the header if the file isn't empty and appending | |
424 next(stdout_iter) | |
425 | |
426 # Iterate the vcftools stdout | |
427 for vcftools_stdout_line in stdout_iter: | |
428 | |
429 # Check if code is running in python 3 | |
430 if sys.version_info[0] == 3: | |
431 # Convert bytes to string | |
432 vcftools_stdout_line = vcftools_stdout_line.decode() | |
433 | |
434 output_file.write(vcftools_stdout_line) | |
435 | |
436 # Close the bed file | |
437 output_file.close() | |
438 | |
439 except: | |
440 # Close the bed file | |
441 output_file.close() | |
442 # Delete the file | |
443 os.remove(output_filename) | |
444 | |
445 raise Exception('vcftools to python pipe error') | |
446 | |
447 # Wait for vctools to finish | |
448 vcftools_call.wait() | |
449 | |
450 # Close the vcftools stdout | |
451 vcftools_call.stdout.close() | |
452 | |
453 # Read the vcftools stderr | |
454 vcftools_stderr = vcftools_call.stderr.read() | |
455 | |
456 # Check if code is running in python 3 | |
457 if sys.version_info[0] == 3: | |
458 # Convert bytes to string | |
459 vcftools_stderr = vcftools_stderr.decode() | |
460 | |
461 # Check that the log file was created correctly | |
462 check_vcftools_for_errors(vcftools_stderr) | |
463 | |
464 logging.info('vcftools call complete') | |
465 | |
466 return vcftools_stderr | |
467 | |
468 def standard_vcftools_call (vcftools_call_args): | |
469 ''' | |
470 Calls vcftools | |
471 | |
472 The function calls vcftools. Returns the stderr of vcftools to | |
473 create log file of the call. | |
474 | |
475 Parameters | |
476 ---------- | |
477 vcftools_call_args : list | |
478 vcftools arguments | |
479 | |
480 Returns | |
481 ------- | |
482 vcftools_out : str | |
483 vcftools call output | |
484 vcftools_err : str | |
485 vcftools log output | |
486 | |
487 Raises | |
488 ------ | |
489 Exception | |
490 If vcftools stderr returns an error | |
491 ''' | |
492 | |
493 # vcftools subprocess call without stdout | |
494 vcftools_call = subprocess.Popen(['vcftools'] + list(map(str, vcftools_call_args)), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
495 | |
496 # Wait for vcftools to finish | |
497 vcftools_stdout, vcftools_stderr = vcftools_call.communicate() | |
498 | |
499 # Check if code is running in python 3 | |
500 if sys.version_info[0] == 3: | |
501 # Convert bytes to string | |
502 vcftools_stderr = vcftools_stderr.decode() | |
503 | |
504 logging.info('vcftools call complete') | |
505 | |
506 # Check that the log file was created correctly | |
507 check_vcftools_for_errors(vcftools_stderr) | |
508 | |
509 return vcftools_stderr | |
510 | |
511 def call_vcftools (vcftools_call_args, output_format = None, output_filename = None): | |
512 ''' | |
513 Calls vcftools | |
514 | |
515 The function calls vcftools. Returns the stderr of vcftools to | |
516 create log file of the call. | |
517 | |
518 Parameters | |
519 ---------- | |
520 vcftools_call_args : list | |
521 vcftools arguments | |
522 output_format : str | |
523 The output format | |
524 output_filename : str | |
525 The output filename assigned by vcftools (for piped calls) | |
526 | |
527 Returns | |
528 ------- | |
529 vcftools_out : str | |
530 vcftools call output | |
531 vcftools_err : str | |
532 vcftools log output | |
533 | |
534 Raises | |
535 ------ | |
536 Exception | |
537 If vcftools stderr returns an error | |
538 ''' | |
539 | |
540 # Check if the output is a bgzipped vcf | |
541 if output_format == 'vcf.gz': | |
542 # Pipe vcftools stdout to bgzip to create a bgzipped vcf | |
543 vcftools_err = pipe_vcftools_bgzip(vcftools_call_args, output_filename) | |
544 # Check if the output is a bcf | |
545 elif output_format == 'bcf': | |
546 # Pipe vcftools stdout to bgzip to create a bgzipped vcf | |
547 vcftools_err = pipe_vcftools_bcftools(vcftools_call_args, output_filename) | |
548 elif output_format == 'removed_bed' or output_format == 'kept_bed': | |
549 # Pipe vcftools stdout to bed file | |
550 vcftools_err = pipe_vcftools_to_bed_file(vcftools_call_args, output_filename) | |
551 elif output_format == 'het-fis': | |
552 vcftools_err = pipe_vcftools_to_file(vcftools_call_args, output_filename, append_output = True) | |
553 else: | |
554 # Call vcftools under standard conditions | |
555 vcftools_err = standard_vcftools_call(vcftools_call_args) | |
556 | |
557 # Return the log | |
558 return vcftools_err | |
559 | |
560 def check_for_vcftools_output (vcftools_output): | |
561 ''' | |
562 Checks for the previous vcftools output | |
563 | |
564 Confirms that neither a previous vcftools log or output file exists. | |
565 | |
566 Parameters | |
567 ---------- | |
568 vcftools_output : str | |
569 Specifies the output filename to be checked | |
570 | |
571 Raises | |
572 ------ | |
573 IOError | |
574 If the vcftools output file exists | |
575 IOError | |
576 If the vcftools log file exists | |
577 | |
578 ''' | |
579 # Check if output file already exists | |
580 if os.path.isfile(vcftools_output): | |
581 raise IOError('VCF output file already exists') | |
582 | |
583 logging.info('Output file assigned') | |
584 | |
585 # Check if log file already exists | |
586 if os.path.isfile(vcftools_output + '.log'): | |
587 raise IOError('Log file already exists') | |
588 | |
589 logging.info('Log file assigned') | |
590 | |
591 def delete_vcftools_output (vcftools_output): | |
592 ''' | |
593 Deletes previous vcftools output | |
594 | |
595 Confirms if previous vcftools output exists, and if so, deletes it | |
596 | |
597 Parameters | |
598 ---------- | |
599 vcftools_output : str | |
600 Specifies the output filename to be deleted | |
601 | |
602 Raises | |
603 ------ | |
604 IOError | |
605 If the vcftools output cannot be deleted | |
606 IOError | |
607 If the vcftools log cannot be deleted | |
608 ''' | |
609 | |
610 # Check if output file already exists | |
611 if os.path.isfile(vcftools_output): | |
612 try: | |
613 # Delete the output | |
614 os.remove(vcftools_output) | |
615 except: | |
616 raise IOError('VCF output file cannot be deleted') | |
617 | |
618 logging.info('Output file assigned') | |
619 | |
620 # Check if log file already exists | |
621 if os.path.isfile(vcftools_output + '.log'): | |
622 try: | |
623 # Delete the output | |
624 os.remove(vcftools_output + '.log') | |
625 except: | |
626 raise IOError('Log file cannot be deleted') | |
627 | |
628 logging.info('Log file assigned') | |
629 | |
630 def check_vcftools_for_errors (vcftools_stderr): | |
631 ''' | |
632 Checks the vcftools stderr for errors | |
633 | |
634 Parameters | |
635 ---------- | |
636 vcftools_stderr : str | |
637 vcftools stderr | |
638 | |
639 Raises | |
640 ------ | |
641 IOError | |
642 If vcftools stderr returns an error | |
643 ''' | |
644 | |
645 # Returns True if the job completed without error | |
646 if 'Run Time' in str(vcftools_stderr): | |
647 pass | |
648 | |
649 # Print output for vcftools if error is detected | |
650 elif 'Error' in str(vcftools_stderr): | |
651 # Splits log into list of lines | |
652 vcftools_stderr_lines = vcftools_stderr.splitlines() | |
653 # Prints the error(s) | |
654 raise Exception('\n'.join((output_line for output_line in vcftools_stderr_lines if output_line.startswith('Error')))) | |
655 | |
656 # Print output if not completed and no error found. Unlikely to be used, but included. | |
657 else: | |
658 raise Exception(vcftools_stderr) | |
659 | |
660 def produce_vcftools_output (output, filename, append_mode = False, strip_header = False): | |
661 ''' | |
662 Creates the vcftools output file | |
663 | |
664 This function will create an output file from the vcftools stdout. | |
665 Please run `check_vcftools_for_errors` prior to check that vcftools | |
666 finished without error. | |
667 | |
668 Parameters | |
669 ---------- | |
670 output : str | |
671 vcftools stdout | |
672 filename : str | |
673 Specifies the filename for the output file | |
674 append_mode : bool | |
675 Used to create a single output file from multiple calls | |
676 strip_header : bool | |
677 Used to remove the header if not needed | |
678 | |
679 Returns | |
680 ------- | |
681 output : file | |
682 vcftools output file | |
683 | |
684 ''' | |
685 | |
686 # Check if the header should be stripped | |
687 if strip_header: | |
688 output = ''.join(output.splitlines(True)[1:]) | |
689 | |
690 # Check if single log file is required from multiple calls | |
691 if append_mode: | |
692 vcftools_log_file = open(filename,'a') | |
693 else: | |
694 vcftools_log_file = open(filename,'w') | |
695 | |
696 vcftools_log_file.write(str(output)) | |
697 vcftools_log_file.close() | |
698 | |
699 def produce_vcftools_log (output, filename, append_mode = False): | |
700 ''' | |
701 Creates the vcftools log file | |
702 | |
703 This function will create a log file from the vcftools stderr. Please | |
704 run `check_vcftools_for_errors` prior to check that vcftools finished | |
705 without error. | |
706 | |
707 Parameters | |
708 ---------- | |
709 output : str | |
710 vcftools stderr | |
711 filename : str | |
712 Specifies the filename for the log file | |
713 append_mode : bool | |
714 Used to create a single log file from multiple calls | |
715 | |
716 Returns | |
717 ------- | |
718 output : file | |
719 vcftools log file | |
720 | |
721 ''' | |
722 # Check if single log file is required from multiple calls | |
723 if append_mode: | |
724 vcftools_log_file = open(filename + '.log','a') | |
725 else: | |
726 vcftools_log_file = open(filename + '.log','w') | |
727 | |
728 vcftools_log_file.write(str(output)) | |
729 vcftools_log_file.close() | |
730 | |
731 def assign_vcftools_input_arg (filename): | |
732 ''' | |
733 Confirms file format for vcftools | |
734 | |
735 Parameters | |
736 ---------- | |
737 filename : str | |
738 Specifies the input filename of unknown format | |
739 | |
740 Returns | |
741 ------- | |
742 list | |
743 Returns vcftools input command for `filename` | |
744 | |
745 Raises | |
746 ------ | |
747 IOError | |
748 If filename is an unknown file format | |
749 ''' | |
750 | |
751 # True if file extensions is recognized by vcftools | |
752 if filename.endswith('.vcf') or filename.endswith('.vcf.gz') or filename.endswith('.bcf'): | |
753 # Assign the associated input command | |
754 if filename.endswith('.vcf'): | |
755 return ['--vcf', filename] | |
756 elif filename.endswith('.vcf.gz'): | |
757 return ['--gzvcf', filename] | |
758 elif filename.endswith('.bcf'): | |
759 return ['--bcf', filename] | |
760 | |
761 # True if file extension is unknown or not recognized | |
762 else: | |
763 | |
764 # Checks if the file is unzipped, bgzipped, or gzipped | |
765 vcfname_format = checkFormat(filename) | |
766 | |
767 # Assign the associated input command, or return an error. | |
768 if vcfname_format == 'vcf': | |
769 return ['--vcf', filename] | |
770 elif vcfname_format == 'bgzip': | |
771 return ['--gzvcf', filename] | |
772 elif vcfname_format == 'bcf': | |
773 return ['--bcf', filename] | |
774 else: | |
775 raise Exception('Unknown VCF file format') |