Mercurial > repos > jaredgk > ppp_vcfphase
comparison shapeit.py @ 0:3830d29fca6a draft
Uploaded
author | jaredgk |
---|---|
date | Mon, 15 Oct 2018 18:15:47 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3830d29fca6a |
---|---|
1 import os | |
2 import sys | |
3 import subprocess | |
4 import shutil | |
5 import argparse | |
6 import glob | |
7 import logging | |
8 | |
9 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) | |
10 | |
11 from vcf_reader_func import checkFormat | |
12 from logging_module import initLogger, logArgs | |
13 from plink import convert_haps_to_vcf | |
14 #from vcftools import bgzip_decompress_vcfgz | |
15 #from bcftools import convert_to_bcf, check_for_index, create_index | |
16 | |
17 def check_shapeit_for_errors (shapeit_stdout, output_prefix): | |
18 ''' | |
19 Checks the shapeit stdout for errors | |
20 | |
21 Parameters | |
22 ---------- | |
23 shapeit_stdout : str | |
24 shapeit stdout | |
25 output_prefix : str | |
26 Output filename prefix | |
27 | |
28 Raises | |
29 ------ | |
30 Exception | |
31 If shapeit stdout returns an error | |
32 ''' | |
33 | |
34 # Returns True if the job completed without error | |
35 if 'Running time:' in str(shapeit_stdout): | |
36 pass | |
37 | |
38 # Print output if not completed and no error found. Unlikely to be used, but included. | |
39 else: | |
40 # Remove intermediate files before reporting the error | |
41 remove_intermediate_files(output_prefix, error_intermediates = True) | |
42 raise Exception(str(shapeit_stdout)) | |
43 | |
44 def remove_intermediate_files (output_prefix, error_intermediates = False): | |
45 ''' | |
46 Removes shapeit intermediate files | |
47 | |
48 This function is used to remove the various intermediate files created | |
49 by shapeit. The exact intermediate files to be removed are defined by | |
50 the error-state of shapeit. The function will also return warnings if | |
51 the intermediate files were not found. | |
52 | |
53 Parameters | |
54 ---------- | |
55 output_prefix : str | |
56 Output filename prefix | |
57 error_intermediates : bool, optional | |
58 Defines if shapeit encountered an error | |
59 | |
60 ''' | |
61 if error_intermediates: | |
62 | |
63 # Check that the log file was created, give a warning otherwise | |
64 if not os.path.isfile(output_prefix + '.phase.log'): | |
65 logging.warning('shapeit intermediate file %s.phase.log does not exist' % output_prefix) | |
66 else: | |
67 # Remove shapeit log file | |
68 os.remove(output_prefix + '.phase.log') | |
69 | |
70 else: | |
71 | |
72 # Check that the phase.ind.mm file was created, give a warning otherwise | |
73 if not os.path.isfile(output_prefix + '.phase.ind.mm'): | |
74 logging.warning('shapeit intermediate file %s.phase.ind.mm does not exist' % output_prefix) | |
75 else: | |
76 # Remove shapeit phase.ind.mm file | |
77 os.remove(output_prefix + '.phase.ind.mm') | |
78 | |
79 # Check that the phase.snp.mm file was created, give a warning otherwise | |
80 if not os.path.isfile(output_prefix + '.phase.snp.mm'): | |
81 logging.warning('shapeit intermediate file %s.phase.snp.mm does not exist' % output_prefix) | |
82 else: | |
83 # Remove shapeit phase.snp.mm file | |
84 os.remove(output_prefix + '.phase.snp.mm') | |
85 | |
86 # Check that the haps file was created, give a warning otherwise | |
87 if not os.path.isfile(output_prefix + '.haps'): | |
88 logging.warning('shapeit intermediate file %s.haps does not exist' % output_prefix) | |
89 else: | |
90 # Remove shapeit haps file | |
91 os.remove(output_prefix + '.haps') | |
92 | |
93 # Check that the sample file was created, give a warning otherwise | |
94 if not os.path.isfile(output_prefix + '.sample'): | |
95 logging.warning('shapeit intermediate file %s.sample does not exist' % output_prefix) | |
96 else: | |
97 # Remove shapeit sample file | |
98 os.remove(output_prefix + '.sample') | |
99 | |
100 logging.info('shapeit-related files removed') | |
101 | |
102 def standard_shapeit_call (shapeit_call_args, output_prefix): | |
103 ''' | |
104 Calls shapeit using subprocess | |
105 | |
106 This function is used to call shapeit and passes the resulting stdout | |
107 to check_shapeit_for_errors to check for errors. The function also | |
108 passes output_prefix to check_shapeit_for_errors to delete shapeit | |
109 intermediate files if shapeit results in an error. | |
110 | |
111 Parameters | |
112 ---------- | |
113 shapeit_call_args : list | |
114 Argument list for shapeit | |
115 output_prefix : str | |
116 Output filename prefix | |
117 | |
118 ''' | |
119 | |
120 logging.info('shapeit phasing parameters assigned') | |
121 | |
122 # Phasing subprocess call | |
123 phase_call = subprocess.Popen(['shapeit'] + shapeit_call_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE) | |
124 phase_stdout, phase_stderr = phase_call.communicate() | |
125 | |
126 # Check if code is running in python 3 | |
127 if sys.version_info[0] == 3: | |
128 # Convert bytes to string | |
129 phase_stdout = phase_stdout.decode() | |
130 | |
131 # Check shapeit call for errors | |
132 check_shapeit_for_errors(phase_stdout, output_prefix) | |
133 | |
134 logging.info('shapeit phasing complete (HAPS format)') | |
135 | |
136 def call_shapeit (shapeit_call_args, output_prefix, output_format): | |
137 ''' | |
138 Calls shapeit and automates file conversions | |
139 | |
140 The function is used to call shapeit and also automates conversion to | |
141 VCF, VCF.GZ, and BCF using plink2 | |
142 | |
143 Parameters | |
144 ---------- | |
145 shapeit_call_args : list | |
146 Argument list for shapeit | |
147 output_prefix : str | |
148 Output filename prefix | |
149 output_format : str | |
150 Output file format | |
151 | |
152 ''' | |
153 | |
154 # Standard call to beagle | |
155 standard_shapeit_call(shapeit_call_args, output_prefix) | |
156 | |
157 # Convert haps-format to vcf | |
158 convert_haps_to_vcf(output_prefix, output_format) | |
159 | |
160 logging.info('HAPS conversion to VCF complete') |