0
|
1 import os
|
|
2 import sys
|
|
3 import subprocess
|
|
4 import shutil
|
|
5 import argparse
|
|
6 import glob
|
|
7 import logging
|
|
8
|
|
9 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))
|
|
10
|
|
11 from vcf_reader_func import checkFormat
|
|
12 from logging_module import initLogger, logArgs
|
|
13 from vcftools import bgzip_decompress_vcfgz
|
|
14 from bcftools import convert_to_bcf, check_for_index, create_index
|
|
15
|
|
16 def delete_beagle_log (output_prefix):
|
|
17 '''
|
|
18 Delete beagle log file
|
|
19
|
|
20 This function is used to delete beagle's log file if an error is
|
|
21 encountered. A warning is produced if the log file cannot be found.
|
|
22
|
|
23 Parameters
|
|
24 ----------
|
|
25 output_prefix : str
|
|
26 Output file prefix
|
|
27 '''
|
|
28
|
|
29 # Check that log file exists, if not return warning
|
|
30 if not os.path.isfile(output_prefix + '.log'):
|
|
31 logging.warning('beagle log file %s.log does not exist' % output_prefix)
|
|
32 else:
|
|
33 os.remove(output_prefix + '.log')
|
|
34
|
|
35 def check_beagle_for_errors (beagle_stderr, output_prefix):
|
|
36 '''
|
|
37 Checks the beagle stdout for errors
|
|
38
|
|
39 Parameters
|
|
40 ----------
|
|
41 beagle_stderr : str
|
|
42 beagle stderr
|
|
43 output_prefix : str
|
|
44 Output file prefix
|
|
45
|
|
46 Raises
|
|
47 ------
|
|
48 Exception
|
|
49 If beagle stdout returns an error
|
|
50 '''
|
|
51
|
|
52 # Check if beagle completed without an error
|
|
53 if not beagle_stderr.strip():
|
|
54 pass
|
|
55
|
|
56 # Print missing data message if that is likely
|
|
57 elif 'ERROR: genotype is missing allele separator:' in str(beagle_stderr):
|
|
58 # Delete the beagle log file
|
|
59 delete_beagle_log(output_prefix)
|
|
60
|
|
61 # Store reported error
|
|
62 error_reported = 'ERROR: genotype is missing allele separator'
|
|
63 # Store message for user about error
|
|
64 user_message = 'Please confirm the input has no missing data.'
|
|
65 # Report on the error
|
|
66 raise Exception(error_reported + '\n' + user_message)
|
|
67
|
|
68 # Print output for beagle if error is detected
|
|
69 elif 'ERROR:' in str(beagle_stderr):
|
|
70 # Delete the beagle log file
|
|
71 delete_beagle_log(output_prefix)
|
|
72
|
|
73 # Splits log into list of lines
|
|
74 beagle_stderr_lines = beagle_stderr.splitlines()
|
|
75 # Prints the error(s)
|
|
76 raise Exception('\n'.join((output_line for output_line in beagle_stderr_lines if output_line.startswith('ERROR:'))))
|
|
77
|
|
78 # Print output if not completed and no error found. Unlikely to be used, but included.
|
|
79 else:
|
|
80 # Delete the beagle log file
|
|
81 delete_beagle_log(output_prefix)
|
|
82
|
|
83 raise Exception(beagle_stderr)
|
|
84
|
|
85
|
|
86 def standard_beagle_call (beagle_path, beagle_call_args, output_prefix):
|
|
87 '''
|
|
88 Calls beagle using subprocess
|
|
89
|
|
90 This function is used to call beagle under standard conditions. The
|
|
91 functions then passes the stderr to check_beagle_for_errors to check
|
|
92 for errors.
|
|
93
|
|
94 Parameters
|
|
95 ----------
|
|
96 beagle_path : str
|
|
97 Path to beagle.jar
|
|
98 beagle_call_args : list
|
|
99 Argument list for beagle
|
|
100 output_prefix : str
|
|
101 Output file prefix
|
|
102 '''
|
|
103
|
|
104 # Assign location of beagle jar file
|
|
105 beagle_jar = os.path.join(beagle_path, 'beagle.jar')
|
|
106
|
|
107 # Check that beagle.jar exists
|
|
108 if not os.path.isfile(beagle_jar):
|
|
109 raise IOError('beagle.jar not found. Path specified: %s' % beagle_path)
|
|
110
|
|
111 logging.info('beagle phasing parameters assigned')
|
|
112
|
|
113 # Phasing subprocess call
|
|
114 phase_call = subprocess.Popen(['java', '-jar', beagle_jar] + beagle_call_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
|
|
115 phase_stdout, phase_stderr = phase_call.communicate()
|
|
116
|
|
117 # Check if code is running in python 3
|
|
118 if sys.version_info[0] == 3:
|
|
119 # Convert bytes to string
|
|
120 phase_stderr = phase_stderr.decode()
|
|
121
|
|
122 # Check beagle call for errors
|
|
123 check_beagle_for_errors(phase_stderr, output_prefix)
|
|
124
|
|
125 logging.info('beagle phasing complete')
|
|
126
|
|
127 def call_beagle (beagle_path, beagle_call_args, output_prefix, output_format):
|
|
128 '''
|
|
129 Automates beagle calls
|
|
130
|
|
131 This function passes the argument list to standard_beagle_call. Once the
|
|
132 beagle call has finished, the function will automatically convert the
|
|
133 bgzip compressed output of beagle to BCF and VCF, if either format is
|
|
134 specified.
|
|
135
|
|
136 Parameters
|
|
137 ----------
|
|
138 beagle_path : str
|
|
139 Path to beagle.jar
|
|
140 beagle_call_args : list
|
|
141 Argument list for beagle
|
|
142 output_prefix : str
|
|
143 Output file prefix
|
|
144 output_format : str
|
|
145 Output file format
|
|
146 '''
|
2
|
147 print (beagle_call_args)
|
0
|
148 # Standard call to beagle
|
|
149 standard_beagle_call(beagle_path, beagle_call_args, output_prefix)
|
|
150
|
|
151 # Decompress if a VCF files is requested
|
|
152 if output_format == 'vcf':
|
|
153 bgzip_decompress_vcfgz(output_prefix + '.vcf.gz')
|
|
154
|
|
155 # Convert to BCF if requested
|
|
156 elif output_format == 'bcf':
|
|
157
|
|
158 # Check if there is an index file
|
|
159 if check_for_index(output_prefix + '.vcf.gz') == False:
|
|
160 # Create an index if not found
|
|
161 create_index(output_prefix + '.vcf.gz')
|
|
162 # Convert vcf.gz to bcf
|
|
163 convert_to_bcf(output_prefix + '.vcf.gz', output_prefix)
|