0
|
1 import os
|
|
2 import sys
|
|
3 import copy
|
|
4 import shutil
|
|
5 import argparse
|
|
6 import glob
|
|
7 import logging
|
|
8
|
|
9 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))
|
|
10
|
|
11 from vcf_reader_func import checkFormat
|
|
12 from logging_module import initLogger, logArgs
|
|
13 from model import read_model_file
|
|
14 from beagle import call_beagle
|
|
15 from shapeit import call_shapeit, remove_intermediate_files
|
|
16 from bcftools import pipe_bcftools_to_chr, chr_subset_file, concatenate
|
|
17
|
|
18 def phase_argument_parser(passed_arguments):
|
|
19 '''Phase Argument Parser - Assigns arguments for vcftools from command line.
|
|
20 Depending on the argument in question, a default value may be specified'''
|
|
21
|
|
22 def parser_confirm_file ():
|
|
23 '''Custom action to confirm file exists'''
|
|
24 class customAction(argparse.Action):
|
|
25 def __call__(self, parser, args, value, option_string=None):
|
|
26 if not os.path.isfile(value):
|
|
27 raise IOError('%s not found' % value)
|
|
28 setattr(args, self.dest, value)
|
|
29 return customAction
|
|
30
|
|
31 def metavar_list (var_list):
|
|
32 '''Create a formmated metavar list for the help output'''
|
|
33 return '{' + ', '.join(var_list) + '}'
|
|
34
|
|
35 phase_parser = argparse.ArgumentParser()
|
|
36
|
|
37 # Input arguments
|
|
38 phase_parser.add_argument('--vcf', help = "Input VCF filename", type = str, required = True, action = parser_confirm_file())
|
|
39
|
|
40 # Model file arguments
|
|
41 phase_parser.add_argument('--model-file', help = 'Defines the model file', type = str, action = parser_confirm_file())
|
|
42 phase_parser.add_argument('--model', help = 'Defines the model to analyze', type = str)
|
|
43
|
|
44 # General arguments
|
|
45 phase_parser.add_argument('--overwrite', help = "Overwrite previous output files", action = 'store_true')
|
|
46 phase_parser.add_argument('--beagle-path', help = "Defines path to locate beagle.jar", type = str, default = 'bin/')
|
|
47
|
|
48 # Phase algorithm argument
|
|
49 phasing_list = ['beagle', 'shapeit']
|
|
50 phasing_default = 'beagle'
|
|
51 phase_parser.add_argument('--phase-algorithm', metavar = metavar_list(phasing_list), help = 'Specifies the phase algorithm to be used', type = str, choices = phasing_list, default = phasing_default)
|
|
52
|
|
53 # Common phasing arguments
|
|
54 phase_parser.add_argument('--Ne', help = 'Defines the effective population size', type = int)
|
|
55 phase_parser.add_argument('--random-seed', help="Defines the random seed value for the random number generator", type = int)
|
|
56 phase_parser.add_argument('--genetic-map', help = 'Genetic map filename', type = str, action = parser_confirm_file())
|
|
57 phase_parser.add_argument('--phase-chr', help = 'Selects a single chromosome to phase', type = str)
|
|
58 phase_parser.add_argument('--phase-from-bp', help = 'Lower bound of sites to include (Only usable with a single chromosome)', type = int)
|
|
59 phase_parser.add_argument('--phase-to-bp', help = 'Upper bound of sites to include (Only usable with a single chromosome)', type = int)
|
|
60
|
|
61 # Shapeit-specific options
|
|
62 phase_parser.add_argument('--shapeit-burn-iter', help = 'Number of the burn-in iterations (shapeit)', type = int)
|
|
63 phase_parser.add_argument('--shapeit-prune-iter', help = 'Number of pruning iterations (shapeit)', type = int)
|
|
64 phase_parser.add_argument('--shapeit-main-iter', help = 'Number of main iterations (shapeit)', type = int)
|
|
65 phase_parser.add_argument('--shapeit-states', help = 'Number of conditioning states for haplotype estimation (shapeit)', type = int)
|
|
66 phase_parser.add_argument('--shapeit-window', help = 'Model window size in Mb (shapeit)', type = float)
|
|
67
|
|
68 # Beagle-specific options
|
|
69 phase_parser.add_argument('--beagle-burn-iter', help = 'Number of the burn-in iterations (beagle)', type = int)
|
|
70 phase_parser.add_argument('--beagle-iter', help = 'Number of iterations after burn-in (beagle)', type = int)
|
|
71 phase_parser.add_argument('--beagle-states', help = 'Number of model states for genotype estimation (beagle)', type = int)
|
|
72 phase_parser.add_argument('--beagle-window', help = 'Sliding window size in cM (beagle)', type = float)
|
|
73 phase_parser.add_argument('--beagle-overlap', help = 'Overlap between neighboring sliding windows in cM (beagle)', type = float)
|
|
74 phase_parser.add_argument('--beagle-error', help = 'HMM allele mismatch probability (beagle)', type = float)
|
|
75 phase_parser.add_argument('--beagle-step', help = 'Step length in cM used for identifying short IBS segments (beagle)', type = float)
|
|
76 phase_parser.add_argument('--beagle-nsteps', help = 'Number of consecutive --beagle-steps used for identifying long IBS segments (beagle)', type = int)
|
|
77
|
|
78 # Output arguments
|
|
79 phase_parser.add_argument('--out', help = 'Defines the output filename')
|
|
80 phase_parser.add_argument('--out-prefix', help = 'Defines the output prefix (used by phasing algorithms)', default = 'out')
|
|
81
|
|
82 out_format_list = ['vcf', 'vcf.gz', 'bcf']
|
|
83 out_format_default = 'vcf.gz'
|
|
84
|
|
85 phase_parser.add_argument('--out-format', metavar = metavar_list(out_format_list), help = 'Specifies the output format.', type = str, choices = out_format_list, default = out_format_default)
|
|
86
|
|
87 if passed_arguments:
|
|
88 return phase_parser.parse_args(passed_arguments)
|
|
89 else:
|
|
90 return phase_parser.parse_args()
|
|
91
|
|
92 def concatenate_logs (log_files, new_log_filename):
|
|
93 '''
|
|
94 Concatenates log files
|
|
95
|
|
96 This function is used to concatenate log files. If a single chromosome
|
|
97 is being analyzed, the function is used to concatenate the log files
|
|
98 from multiple wrappers. If multiple chromosomes are being analyzed the
|
|
99 function is used to concatenate the logs of each chromosome.
|
|
100
|
|
101 Raises
|
|
102 ------
|
|
103 IOError
|
|
104 If a log file does not exist
|
|
105 '''
|
|
106
|
|
107 # Open the new log file
|
|
108 with open(new_log_filename, 'wb') as out_file:
|
|
109 # Loop the logs to be combined
|
|
110 for log_file in log_files:
|
|
111 # Confirm the log file exists, raise an error if not
|
|
112 if not os.path.isfile(log_file):
|
|
113 raise IOError('%s not found' % log_file)
|
|
114 # Open the log file
|
|
115 with open(log_file, 'rb') as in_file:
|
|
116 # Copy the contents of the file
|
|
117 shutil.copyfileobj(in_file, out_file)
|
|
118
|
|
119 # Remove old log files
|
|
120 for log_file in log_files:
|
|
121 os.remove(log_file)
|
|
122
|
|
123 def assign_vcf_extension (filename):
|
|
124 # Checks if the file is unzipped, bgzipped, or gzipped
|
|
125 vcfname_format = checkFormat(filename)
|
|
126
|
|
127 if vcfname_format == 'vcf':
|
|
128 return '.vcf'
|
|
129
|
|
130 elif vcfname_format == 'gzip' or vcfname_format == 'bgzip':
|
|
131 return '.vcf.gz'
|
|
132
|
|
133 elif vcfname_format == 'bcf':
|
|
134 raise Exception('BCF not supported in current version')
|
|
135
|
|
136 else:
|
|
137 raise Exception('Unknown file format')
|
|
138
|
|
139 def run (passed_arguments = []):
|
|
140 '''
|
|
141 Phaser for VCF files.
|
|
142
|
|
143 Automates the phasing process for a specified VCF file. The function
|
|
144 allows users to select between multiple phasing algorithms: beagle
|
|
145 (default) and shapit.
|
|
146
|
|
147 Parameters
|
|
148 ----------
|
|
149 --vcf : str
|
|
150 Specifies the input VCF filename
|
|
151 --phase-algorithm : str
|
|
152 Specifies the algorithm to be used. Choices: beagle (default) and
|
|
153 shapit
|
|
154 --out : str
|
|
155 Specifies the output filename
|
|
156
|
|
157 Returns
|
|
158 -------
|
|
159 output : file
|
|
160 Phased VCF file
|
|
161
|
|
162 Raises
|
|
163 ------
|
|
164 IOError
|
|
165 Input VCF file does not exist
|
|
166 IOError
|
|
167 Output file already exists
|
|
168
|
|
169 '''
|
|
170
|
|
171 # Grab VCF arguments from command line
|
|
172 phase_args = phase_argument_parser(passed_arguments)
|
|
173
|
|
174 # Adds the arguments (i.e. parameters) to the log file
|
|
175 logArgs(phase_args, func_name = 'vcf_phase')
|
|
176
|
|
177 # Assign file extension for VCF input file
|
|
178 vcfname_ext = assign_vcf_extension(phase_args.vcf)
|
|
179
|
|
180 logging.info('Input file assigned')
|
|
181
|
|
182 # Used to confirm if the VCF input was renamed
|
|
183 vcfname_renamed = False
|
|
184
|
|
185 # Confirm input has correct file extension
|
|
186 if vcfname_ext not in phase_args.vcf:
|
|
187 vcfname_renamed = True
|
|
188 os.rename(phase_args.vcf, phase_args.vcf + vcfname_ext)
|
|
189 phase_args.vcf += vcfname_ext
|
|
190
|
|
191 # List to hold beagle/shapeit arguments
|
|
192 phase_call_args = []
|
|
193
|
|
194 # bool to check if an eclude file was created
|
|
195 exclude_file_created = False
|
|
196
|
|
197 # bool to check if an include file was created
|
|
198 include_file_created = False
|
|
199
|
|
200 # Performs actions related to --model-file and argument assignment
|
|
201 if phase_args.model_file:
|
|
202
|
|
203 # Check if a model has been specified
|
|
204 if not phase_args.model:
|
|
205 raise IOError('No selected model. Please use --model to select a model')
|
|
206
|
|
207 # Read in the model file
|
|
208 models_file = read_model_file(phase_args.model_file)
|
|
209
|
|
210 # Check that the selected model was found in the file
|
|
211 if phase_args.model not in models_file:
|
|
212 raise IOError('Selected model "%s" not found in: %s' % (phase_args.model, phase_args.model_file))
|
|
213
|
|
214 logging.info('Model assigned')
|
|
215
|
|
216 if phase_args.phase_algorithm == 'beagle':
|
|
217
|
|
218 # Get list of individuals to include
|
|
219 inds_in_model = models_file[phase_args.model].inds
|
|
220
|
|
221 # Create file with individuals to exclude (beagle has no include option)
|
|
222 models_file.create_exclude_ind_file(inds_to_include = inds_in_model, overwrite = phase_args.overwrite)
|
|
223
|
|
224 # Assign exclude file
|
|
225 phase_call_args.append('excludesamples=' + models_file.exclude_file)
|
|
226
|
|
227 # Confirm that the exclude file was created
|
|
228 exclude_file_created = True
|
|
229
|
|
230 elif phase_args.phase_algorithm == 'shapeit':
|
|
231
|
|
232 # Assign the model
|
|
233 selected_model = models_file[phase_args.model]
|
|
234
|
|
235 # Create file with individuals to exclude (beagle has no include option)
|
|
236 selected_model.create_ind_file(overwrite = phase_args.overwrite)
|
|
237
|
|
238 # Assign exclude file
|
|
239 phase_call_args.extend(['--include-ind', selected_model.ind_file])
|
|
240
|
|
241 # Confirm that the include file was created
|
|
242 include_file_created = True
|
|
243
|
|
244 # Get the list of chromosomes within the VCF
|
|
245 chrs_in_vcf = pipe_bcftools_to_chr(phase_args.vcf)
|
|
246
|
|
247 # Check if the user specified a specific chromosome
|
|
248 if phase_args.phase_chr:
|
|
249 # Check if the chromosome is within the file
|
|
250 if phase_args.phase_chr not in chrs_in_vcf:
|
|
251 raise Exception('--phase-chr %s not found in %s' % (phase_args.phase_chr, phase_args.vcf))
|
|
252
|
|
253 # Check that a chr was specified if a bp-based argument was specified
|
|
254 if (phase_args.phase_from_bp or phase_args.phase_to_bp) and not phase_args.phase_chr:
|
|
255 # Check if the file has a single chromosome
|
|
256 if len(chrs_in_vcf) == 1:
|
|
257 # Check if the beagle algorithm is being used
|
|
258 if phase_args.phase_algorithm == 'beagle':
|
|
259 # Assign the chromosome, as beagle requires it to be assigned
|
|
260 phase_args.phase_chr = chrs_in_vcf[0]
|
|
261
|
|
262 logging.info('Assigned chr %s for beagle' % chrs_in_vcf[0])
|
|
263 # Report error if multiple chromosomes are within the file
|
|
264 else:
|
|
265 raise Exception('The --phase-from-bp and --phase-to-bp arguments '
|
|
266 'require the --phase-chr argument to function if '
|
|
267 'multiple chrs are within %s' % phase_args.vcf)
|
|
268
|
|
269 # Assign general arguments and call beagle
|
|
270 if phase_args.phase_algorithm == 'beagle':
|
|
271
|
|
272 # Assigns the input and output arguments
|
|
273 phase_call_args.extend(['gt=' + phase_args.vcf,
|
|
274 'out=' + phase_args.out_prefix])
|
|
275
|
|
276 # Assign the burn-in iter, if specified
|
|
277 if phase_args.beagle_burn_iter:
|
|
278 phase_call_args.append('burnin=' + phase_args.beagle_burn_iter)
|
|
279
|
|
280 # Assign the iter, if specified
|
|
281 if phase_args.beagle_iter:
|
|
282 phase_call_args.append('iterations=' + phase_args.beagle_iter)
|
|
283
|
|
284 # Assign the state count, if specified
|
|
285 if phase_args.beagle_states:
|
|
286 phase_call_args.append('phase-states=' + phase_args.beagle_states)
|
|
287
|
|
288 # Assign the window length, if specified
|
|
289 if phase_args.beagle_window:
|
|
290 phase_call_args.append('window=' + phase_args.beagle_window)
|
|
291
|
|
292 # Assign the window overlap, if specified
|
|
293 if phase_args.beagle_overlap:
|
|
294 phase_call_args.append('overlap=' + phase_args.beagle_overlap)
|
|
295
|
|
296 # Assign the HMM error, if specified
|
|
297 if phase_args.beagle_error:
|
|
298 phase_call_args.append('err=' + phase_args.beagle_error)
|
|
299
|
|
300 # Assign the step length, if specified
|
|
301 if phase_args.beagle_step:
|
|
302 phase_call_args.append('step=' + phase_args.beagle_step)
|
|
303
|
|
304 # Assign the number of steps, if specified
|
|
305 if phase_args.beagle_nsteps:
|
|
306 phase_call_args.append('nsteps=' + phase_args.beagle_nsteps)
|
|
307
|
|
308 # Assign the genetic map, if specified
|
|
309 if phase_args.genetic_map:
|
|
310 phase_call_args.append('map=' + phase_args.genetic_map)
|
|
311
|
|
312 # Assign the effective pop size, if specified
|
|
313 if phase_args.Ne:
|
|
314 phase_call_args.append('ne=' + phase_args.Ne)
|
|
315
|
|
316 # Assign the random seed, if specified
|
|
317 if phase_args.random_seed:
|
|
318 phase_call_args.append('seed=' + phase_args.random_seed)
|
|
319
|
|
320 # Assign the chromosome to phase, if specified
|
|
321 if phase_args.phase_chr:
|
|
322
|
|
323 # Store the chromosome argument
|
|
324 chr_arg = 'chrom=%s' % phase_args.phase_chr
|
|
325
|
|
326 # Check if either bp position arguments were specified
|
|
327 if phase_args.phase_from_bp or phase_args.phase_to_bp:
|
|
328
|
|
329 # List of the position arguments, in their required order
|
|
330 position_args = [':', phase_args.phase_from_bp, '-', phase_args.phase_to_bp]
|
|
331
|
|
332 # Filter the position arguments to remove empty values
|
|
333 filttered_position_args = filter(None, position_args)
|
|
334
|
|
335 # Map the arguments to str and add them to the chromosome argument
|
|
336 chr_arg += ''.join(map(str, filttered_position_args))
|
|
337
|
|
338 # Add the chromosome argument the argument list
|
|
339 phase_call_args.append(chr_arg)
|
|
340
|
|
341 # Call beagle wrapper
|
|
342 call_beagle(phase_args.beagle_path, list(map(str, phase_call_args)), phase_args.out_prefix, phase_args.out_format)
|
|
343
|
|
344 # Assign expected phased output filename
|
|
345 phased_output = '%s.%s' % (phase_args.out_prefix, phase_args.out_format)
|
|
346
|
|
347 # Rename output to phase_args.out, if specified
|
|
348 if phase_args.out:
|
|
349 shutil.move(phased_output, phase_args.out)
|
|
350 shutil.move(phase_args.out_prefix + '.log', phase_args.out + '.log')
|
|
351 # Rename log using phased_output
|
|
352 else:
|
|
353 shutil.move(phase_args.out_prefix + '.log', phased_output + '.log')
|
|
354
|
|
355 logging.info('beagle log file created')
|
|
356
|
|
357 # Assign general arguments and call shapeit
|
|
358 elif phase_args.phase_algorithm == 'shapeit':
|
|
359
|
|
360 # Assign the shapeit burn in iter, if specified
|
|
361 if phase_args.shapeit_burn_iter:
|
|
362 phase_call_args.extend(['--burn', phase_args.shapeit_burn_iter])
|
|
363
|
|
364 # Assign the shapeit prune iter, if specified
|
|
365 if phase_args.shapeit_prune_iter:
|
|
366 phase_call_args.extend(['--prune', phase_args.shapeit_prune_iter])
|
|
367
|
|
368 # Assign the shapeit main iter, if specified
|
|
369 if phase_args.shapeit_main_iter:
|
|
370 phase_call_args.extend(['--main', phase_args.shapeit_main_iter])
|
|
371
|
|
372 # Assign the number of shapeit states if specified
|
|
373 if phase_args.shapeit_states:
|
|
374 phase_call_args.extend(['--states', phase_args.shapeit_states])
|
|
375
|
|
376 # Assign the shapeit window size, if specified
|
|
377 if phase_args.shapeit_window:
|
|
378 phase_call_args.extend(['--window', phase_args.shapeit_window])
|
|
379
|
|
380 # Assign the genetic map, if specified
|
|
381 if phase_args.genetic_map:
|
|
382 phase_call_args.extend(['--input-map', phase_args.genetic_map])
|
|
383
|
|
384 # Assign the effective pop size, if specified
|
|
385 if phase_args.Ne:
|
|
386 phase_call_args.extend(['--effective-size', phase_args.Ne])
|
|
387
|
|
388 # Assign the random seed, if specified
|
|
389 if phase_args.random_seed:
|
|
390 phase_call_args.extend(['--seed', phase_args.random_seed])
|
|
391
|
|
392 # Check if only a single shapeit run is required
|
|
393 if phase_args.phase_chr or len(chrs_in_vcf) == 1:
|
|
394
|
|
395 # Holds shapeit input filename, as a temp file may be required
|
|
396 shapeit_input_vcf = None
|
|
397
|
|
398 # Check if a single chromosome is selected
|
|
399 if phase_args.phase_chr and len(chrs_in_vcf) > 1:
|
|
400 # Assign the chromosome-specific input prefix
|
|
401 chr_input_prefix = phase_args.vcf + '.' + phase_args.phase_chr
|
|
402
|
|
403 # Assign the chr-specific file as the shapeit input filename
|
|
404 shapeit_input_vcf = chr_input_prefix + '.vcf.gz'
|
|
405
|
|
406 # Create the chromosome-specific input
|
|
407 chr_subset_file(phase_args.vcf,
|
|
408 phase_args.phase_chr,
|
|
409 chr_input_prefix,
|
|
410 'vcf.gz',
|
|
411 from_bp = phase_args.phase_from_bp,
|
|
412 to_bp = phase_args.phase_to_bp)
|
|
413
|
|
414 logging.info('Chr %s subset created' % phase_args.phase_chr)
|
|
415
|
|
416 else:
|
|
417 # Assign the shapeit input filename
|
|
418 shapeit_input_vcf = phase_args.vcf
|
|
419
|
|
420 # Assigns the input and output arguments for shapeit
|
|
421 phase_call_args.extend(['--input-vcf', shapeit_input_vcf,
|
|
422 '--output-max', phase_args.out_prefix,
|
|
423 '--output-log', phase_args.out_prefix + '.phase.log'])
|
|
424
|
|
425 # Assign the from bp position argument, if specified
|
|
426 if phase_args.phase_from_bp:
|
|
427 phase_call_args.extend(['--input-from', phase_args.phase_from_bp])
|
|
428
|
|
429 # Assign the to bp position argument, if specified
|
|
430 if phase_args.phase_to_bp:
|
|
431 phase_call_args.extend(['--input-to', phase_args.phase_to_bp])
|
|
432
|
|
433 # Call shapeit wrapper
|
|
434 call_shapeit(list(map(str, phase_call_args)), phase_args.out_prefix, phase_args.out_format)
|
|
435
|
|
436 # Assign expected phased output filename
|
|
437 phased_output = '%s.%s' % (phase_args.out_prefix, phase_args.out_format)
|
|
438
|
|
439 # Combine the log files
|
|
440 concatenate_logs([phase_args.out_prefix + '.phase.log', phase_args.out_prefix + '.log'], phased_output + '.log')
|
|
441
|
|
442 # Rename output to phase_args.out, if specified
|
|
443 if phase_args.out:
|
|
444 shutil.move(phased_output, phase_args.out)
|
|
445 shutil.move(phased_output + '.log', phase_args.out + '.log')
|
|
446
|
|
447 logging.info('shapeit log file created')
|
|
448
|
|
449 # Check if a chr subset file was created
|
|
450 if phase_args.phase_chr and len(chrs_in_vcf) > 1:
|
|
451 # Delete the chromosome-specific input
|
|
452 os.remove(shapeit_input_vcf)
|
|
453
|
|
454 logging.info('Chr %s subset deleted' % phase_args.phase_chr)
|
|
455
|
|
456 # Remove intermediate files created by shapeit
|
|
457 remove_intermediate_files(phase_args.out_prefix)
|
|
458
|
|
459 # Check if multiple shapeit runs are required
|
|
460 else:
|
|
461
|
|
462 # List to store the phased filenames
|
|
463 phased_filename_list = []
|
|
464
|
|
465 # List to store the phased logs
|
|
466 phased_log_list = []
|
|
467
|
|
468 logging.info('Multi-chr shapeit phasing assigned')
|
|
469
|
|
470 for chr_in_vcf in chrs_in_vcf:
|
|
471
|
|
472 logging.info('Chr %s assigned' % chr_in_vcf)
|
|
473
|
|
474 # Copy the arguments for this run
|
|
475 chr_call_args = copy.deepcopy(phase_call_args)
|
|
476
|
|
477 # Assign the chromosome-specific output prefix
|
|
478 chr_out_prefix = phase_args.out_prefix + '.' + chr_in_vcf
|
|
479
|
|
480 # Assign the expected chromosome-specific output filename
|
|
481 chr_out_filename = '%s.%s' % (chr_out_prefix, phase_args.out_format)
|
|
482
|
|
483 # Store the output filename
|
|
484 phased_filename_list.append(chr_out_filename)
|
|
485
|
|
486 # Assign the chromosome-specific input prefix
|
|
487 chr_input_prefix = phase_args.vcf + '.' + chr_in_vcf
|
|
488
|
|
489 # Assign the expected chromosome-specific input filename
|
|
490 chr_in_filename = chr_input_prefix + '.vcf.gz'
|
|
491
|
|
492 # Create the chromosome-specific input
|
|
493 chr_subset_file(phase_args.vcf,
|
|
494 chr_in_vcf,
|
|
495 chr_input_prefix,
|
|
496 'vcf.gz')
|
|
497
|
|
498 # Assigns the input and output arguments for shapeit
|
|
499 chr_call_args.extend(['--input-vcf', chr_in_filename,
|
|
500 '--output-max', chr_out_prefix,
|
|
501 '--output-log', chr_out_prefix + '.phase.log'])
|
|
502
|
|
503 # Call shapeit wrapper
|
|
504 call_shapeit(list(map(str, chr_call_args)), chr_out_prefix, phase_args.out_format)
|
|
505
|
|
506 # Combine log files
|
|
507 concatenate_logs([chr_out_prefix + '.phase.log', chr_out_prefix + '.log'], chr_out_filename + '.log')
|
|
508
|
|
509 # Store the filename of the combined logs
|
|
510 phased_log_list.append(chr_out_filename + '.log')
|
|
511
|
|
512 # Delete the chromosome-specific input
|
|
513 os.remove(chr_in_filename)
|
|
514
|
|
515 # Remove intermediate files created by shapeit
|
|
516 remove_intermediate_files(chr_out_prefix)
|
|
517
|
|
518 # Concatenate the vcf files
|
|
519 concatenate(phased_filename_list, phase_args.out_prefix, phase_args.out_format)
|
|
520
|
|
521 logging.info('Concatenated chromosomes')
|
|
522
|
|
523 # Assign expected concatenated output filename
|
|
524 phased_output = '%s.%s' % (phase_args.out_prefix, phase_args.out_format)
|
|
525
|
|
526 # Combine the log files
|
|
527 concatenate_logs(phased_log_list, phased_output + '.log')
|
|
528
|
|
529 # Rename output to phase_args.out, if specified
|
|
530 if phase_args.out:
|
|
531 shutil.move(phased_output, phase_args.out)
|
|
532 shutil.move(phased_output + '.log', phase_args.out + '.log')
|
|
533
|
|
534 logging.info('Multi-chr shapeit log created')
|
|
535
|
|
536 # Reverts the VCF input file
|
|
537 if vcfname_renamed:
|
|
538 os.rename(phase_args.vcf, phase_args.vcf[:-len(vcfname_ext)])
|
|
539
|
|
540 if __name__ == "__main__":
|
|
541 initLogger()
|
|
542 run()
|