comparison shapeit.py @ 0:3830d29fca6a draft

Uploaded
author jaredgk
date Mon, 15 Oct 2018 18:15:47 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:3830d29fca6a
1 import os
2 import sys
3 import subprocess
4 import shutil
5 import argparse
6 import glob
7 import logging
8
9 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))
10
11 from vcf_reader_func import checkFormat
12 from logging_module import initLogger, logArgs
13 from plink import convert_haps_to_vcf
14 #from vcftools import bgzip_decompress_vcfgz
15 #from bcftools import convert_to_bcf, check_for_index, create_index
16
17 def check_shapeit_for_errors (shapeit_stdout, output_prefix):
18 '''
19 Checks the shapeit stdout for errors
20
21 Parameters
22 ----------
23 shapeit_stdout : str
24 shapeit stdout
25 output_prefix : str
26 Output filename prefix
27
28 Raises
29 ------
30 Exception
31 If shapeit stdout returns an error
32 '''
33
34 # Returns True if the job completed without error
35 if 'Running time:' in str(shapeit_stdout):
36 pass
37
38 # Print output if not completed and no error found. Unlikely to be used, but included.
39 else:
40 # Remove intermediate files before reporting the error
41 remove_intermediate_files(output_prefix, error_intermediates = True)
42 raise Exception(str(shapeit_stdout))
43
44 def remove_intermediate_files (output_prefix, error_intermediates = False):
45 '''
46 Removes shapeit intermediate files
47
48 This function is used to remove the various intermediate files created
49 by shapeit. The exact intermediate files to be removed are defined by
50 the error-state of shapeit. The function will also return warnings if
51 the intermediate files were not found.
52
53 Parameters
54 ----------
55 output_prefix : str
56 Output filename prefix
57 error_intermediates : bool, optional
58 Defines if shapeit encountered an error
59
60 '''
61 if error_intermediates:
62
63 # Check that the log file was created, give a warning otherwise
64 if not os.path.isfile(output_prefix + '.phase.log'):
65 logging.warning('shapeit intermediate file %s.phase.log does not exist' % output_prefix)
66 else:
67 # Remove shapeit log file
68 os.remove(output_prefix + '.phase.log')
69
70 else:
71
72 # Check that the phase.ind.mm file was created, give a warning otherwise
73 if not os.path.isfile(output_prefix + '.phase.ind.mm'):
74 logging.warning('shapeit intermediate file %s.phase.ind.mm does not exist' % output_prefix)
75 else:
76 # Remove shapeit phase.ind.mm file
77 os.remove(output_prefix + '.phase.ind.mm')
78
79 # Check that the phase.snp.mm file was created, give a warning otherwise
80 if not os.path.isfile(output_prefix + '.phase.snp.mm'):
81 logging.warning('shapeit intermediate file %s.phase.snp.mm does not exist' % output_prefix)
82 else:
83 # Remove shapeit phase.snp.mm file
84 os.remove(output_prefix + '.phase.snp.mm')
85
86 # Check that the haps file was created, give a warning otherwise
87 if not os.path.isfile(output_prefix + '.haps'):
88 logging.warning('shapeit intermediate file %s.haps does not exist' % output_prefix)
89 else:
90 # Remove shapeit haps file
91 os.remove(output_prefix + '.haps')
92
93 # Check that the sample file was created, give a warning otherwise
94 if not os.path.isfile(output_prefix + '.sample'):
95 logging.warning('shapeit intermediate file %s.sample does not exist' % output_prefix)
96 else:
97 # Remove shapeit sample file
98 os.remove(output_prefix + '.sample')
99
100 logging.info('shapeit-related files removed')
101
102 def standard_shapeit_call (shapeit_call_args, output_prefix):
103 '''
104 Calls shapeit using subprocess
105
106 This function is used to call shapeit and passes the resulting stdout
107 to check_shapeit_for_errors to check for errors. The function also
108 passes output_prefix to check_shapeit_for_errors to delete shapeit
109 intermediate files if shapeit results in an error.
110
111 Parameters
112 ----------
113 shapeit_call_args : list
114 Argument list for shapeit
115 output_prefix : str
116 Output filename prefix
117
118 '''
119
120 logging.info('shapeit phasing parameters assigned')
121
122 # Phasing subprocess call
123 phase_call = subprocess.Popen(['shapeit'] + shapeit_call_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
124 phase_stdout, phase_stderr = phase_call.communicate()
125
126 # Check if code is running in python 3
127 if sys.version_info[0] == 3:
128 # Convert bytes to string
129 phase_stdout = phase_stdout.decode()
130
131 # Check shapeit call for errors
132 check_shapeit_for_errors(phase_stdout, output_prefix)
133
134 logging.info('shapeit phasing complete (HAPS format)')
135
136 def call_shapeit (shapeit_call_args, output_prefix, output_format):
137 '''
138 Calls shapeit and automates file conversions
139
140 The function is used to call shapeit and also automates conversion to
141 VCF, VCF.GZ, and BCF using plink2
142
143 Parameters
144 ----------
145 shapeit_call_args : list
146 Argument list for shapeit
147 output_prefix : str
148 Output filename prefix
149 output_format : str
150 Output file format
151
152 '''
153
154 # Standard call to beagle
155 standard_shapeit_call(shapeit_call_args, output_prefix)
156
157 # Convert haps-format to vcf
158 convert_haps_to_vcf(output_prefix, output_format)
159
160 logging.info('HAPS conversion to VCF complete')