Mercurial > repos > jaredgk > ppp_vcfphase
comparison shapeit.py @ 0:3830d29fca6a draft
Uploaded
| author | jaredgk |
|---|---|
| date | Mon, 15 Oct 2018 18:15:47 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3830d29fca6a |
|---|---|
| 1 import os | |
| 2 import sys | |
| 3 import subprocess | |
| 4 import shutil | |
| 5 import argparse | |
| 6 import glob | |
| 7 import logging | |
| 8 | |
| 9 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) | |
| 10 | |
| 11 from vcf_reader_func import checkFormat | |
| 12 from logging_module import initLogger, logArgs | |
| 13 from plink import convert_haps_to_vcf | |
| 14 #from vcftools import bgzip_decompress_vcfgz | |
| 15 #from bcftools import convert_to_bcf, check_for_index, create_index | |
| 16 | |
| 17 def check_shapeit_for_errors (shapeit_stdout, output_prefix): | |
| 18 ''' | |
| 19 Checks the shapeit stdout for errors | |
| 20 | |
| 21 Parameters | |
| 22 ---------- | |
| 23 shapeit_stdout : str | |
| 24 shapeit stdout | |
| 25 output_prefix : str | |
| 26 Output filename prefix | |
| 27 | |
| 28 Raises | |
| 29 ------ | |
| 30 Exception | |
| 31 If shapeit stdout returns an error | |
| 32 ''' | |
| 33 | |
| 34 # Returns True if the job completed without error | |
| 35 if 'Running time:' in str(shapeit_stdout): | |
| 36 pass | |
| 37 | |
| 38 # Print output if not completed and no error found. Unlikely to be used, but included. | |
| 39 else: | |
| 40 # Remove intermediate files before reporting the error | |
| 41 remove_intermediate_files(output_prefix, error_intermediates = True) | |
| 42 raise Exception(str(shapeit_stdout)) | |
| 43 | |
| 44 def remove_intermediate_files (output_prefix, error_intermediates = False): | |
| 45 ''' | |
| 46 Removes shapeit intermediate files | |
| 47 | |
| 48 This function is used to remove the various intermediate files created | |
| 49 by shapeit. The exact intermediate files to be removed are defined by | |
| 50 the error-state of shapeit. The function will also return warnings if | |
| 51 the intermediate files were not found. | |
| 52 | |
| 53 Parameters | |
| 54 ---------- | |
| 55 output_prefix : str | |
| 56 Output filename prefix | |
| 57 error_intermediates : bool, optional | |
| 58 Defines if shapeit encountered an error | |
| 59 | |
| 60 ''' | |
| 61 if error_intermediates: | |
| 62 | |
| 63 # Check that the log file was created, give a warning otherwise | |
| 64 if not os.path.isfile(output_prefix + '.phase.log'): | |
| 65 logging.warning('shapeit intermediate file %s.phase.log does not exist' % output_prefix) | |
| 66 else: | |
| 67 # Remove shapeit log file | |
| 68 os.remove(output_prefix + '.phase.log') | |
| 69 | |
| 70 else: | |
| 71 | |
| 72 # Check that the phase.ind.mm file was created, give a warning otherwise | |
| 73 if not os.path.isfile(output_prefix + '.phase.ind.mm'): | |
| 74 logging.warning('shapeit intermediate file %s.phase.ind.mm does not exist' % output_prefix) | |
| 75 else: | |
| 76 # Remove shapeit phase.ind.mm file | |
| 77 os.remove(output_prefix + '.phase.ind.mm') | |
| 78 | |
| 79 # Check that the phase.snp.mm file was created, give a warning otherwise | |
| 80 if not os.path.isfile(output_prefix + '.phase.snp.mm'): | |
| 81 logging.warning('shapeit intermediate file %s.phase.snp.mm does not exist' % output_prefix) | |
| 82 else: | |
| 83 # Remove shapeit phase.snp.mm file | |
| 84 os.remove(output_prefix + '.phase.snp.mm') | |
| 85 | |
| 86 # Check that the haps file was created, give a warning otherwise | |
| 87 if not os.path.isfile(output_prefix + '.haps'): | |
| 88 logging.warning('shapeit intermediate file %s.haps does not exist' % output_prefix) | |
| 89 else: | |
| 90 # Remove shapeit haps file | |
| 91 os.remove(output_prefix + '.haps') | |
| 92 | |
| 93 # Check that the sample file was created, give a warning otherwise | |
| 94 if not os.path.isfile(output_prefix + '.sample'): | |
| 95 logging.warning('shapeit intermediate file %s.sample does not exist' % output_prefix) | |
| 96 else: | |
| 97 # Remove shapeit sample file | |
| 98 os.remove(output_prefix + '.sample') | |
| 99 | |
| 100 logging.info('shapeit-related files removed') | |
| 101 | |
| 102 def standard_shapeit_call (shapeit_call_args, output_prefix): | |
| 103 ''' | |
| 104 Calls shapeit using subprocess | |
| 105 | |
| 106 This function is used to call shapeit and passes the resulting stdout | |
| 107 to check_shapeit_for_errors to check for errors. The function also | |
| 108 passes output_prefix to check_shapeit_for_errors to delete shapeit | |
| 109 intermediate files if shapeit results in an error. | |
| 110 | |
| 111 Parameters | |
| 112 ---------- | |
| 113 shapeit_call_args : list | |
| 114 Argument list for shapeit | |
| 115 output_prefix : str | |
| 116 Output filename prefix | |
| 117 | |
| 118 ''' | |
| 119 | |
| 120 logging.info('shapeit phasing parameters assigned') | |
| 121 | |
| 122 # Phasing subprocess call | |
| 123 phase_call = subprocess.Popen(['shapeit'] + shapeit_call_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE) | |
| 124 phase_stdout, phase_stderr = phase_call.communicate() | |
| 125 | |
| 126 # Check if code is running in python 3 | |
| 127 if sys.version_info[0] == 3: | |
| 128 # Convert bytes to string | |
| 129 phase_stdout = phase_stdout.decode() | |
| 130 | |
| 131 # Check shapeit call for errors | |
| 132 check_shapeit_for_errors(phase_stdout, output_prefix) | |
| 133 | |
| 134 logging.info('shapeit phasing complete (HAPS format)') | |
| 135 | |
| 136 def call_shapeit (shapeit_call_args, output_prefix, output_format): | |
| 137 ''' | |
| 138 Calls shapeit and automates file conversions | |
| 139 | |
| 140 The function is used to call shapeit and also automates conversion to | |
| 141 VCF, VCF.GZ, and BCF using plink2 | |
| 142 | |
| 143 Parameters | |
| 144 ---------- | |
| 145 shapeit_call_args : list | |
| 146 Argument list for shapeit | |
| 147 output_prefix : str | |
| 148 Output filename prefix | |
| 149 output_format : str | |
| 150 Output file format | |
| 151 | |
| 152 ''' | |
| 153 | |
| 154 # Standard call to beagle | |
| 155 standard_shapeit_call(shapeit_call_args, output_prefix) | |
| 156 | |
| 157 # Convert haps-format to vcf | |
| 158 convert_haps_to_vcf(output_prefix, output_format) | |
| 159 | |
| 160 logging.info('HAPS conversion to VCF complete') |
