Mercurial > repos > jaredgk > ppp_vcfphase
diff shapeit.py @ 0:3830d29fca6a draft
Uploaded
author | jaredgk |
---|---|
date | Mon, 15 Oct 2018 18:15:47 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shapeit.py Mon Oct 15 18:15:47 2018 -0400 @@ -0,0 +1,160 @@ +import os +import sys +import subprocess +import shutil +import argparse +import glob +import logging + +sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) + +from vcf_reader_func import checkFormat +from logging_module import initLogger, logArgs +from plink import convert_haps_to_vcf +#from vcftools import bgzip_decompress_vcfgz +#from bcftools import convert_to_bcf, check_for_index, create_index + +def check_shapeit_for_errors (shapeit_stdout, output_prefix): + ''' + Checks the shapeit stdout for errors + + Parameters + ---------- + shapeit_stdout : str + shapeit stdout + output_prefix : str + Output filename prefix + + Raises + ------ + Exception + If shapeit stdout returns an error + ''' + + # Returns True if the job completed without error + if 'Running time:' in str(shapeit_stdout): + pass + + # Print output if not completed and no error found. Unlikely to be used, but included. + else: + # Remove intermediate files before reporting the error + remove_intermediate_files(output_prefix, error_intermediates = True) + raise Exception(str(shapeit_stdout)) + +def remove_intermediate_files (output_prefix, error_intermediates = False): + ''' + Removes shapeit intermediate files + + This function is used to remove the various intermediate files created + by shapeit. The exact intermediate files to be removed are defined by + the error-state of shapeit. The function will also return warnings if + the intermediate files were not found. + + Parameters + ---------- + output_prefix : str + Output filename prefix + error_intermediates : bool, optional + Defines if shapeit encountered an error + + ''' + if error_intermediates: + + # Check that the log file was created, give a warning otherwise + if not os.path.isfile(output_prefix + '.phase.log'): + logging.warning('shapeit intermediate file %s.phase.log does not exist' % output_prefix) + else: + # Remove shapeit log file + os.remove(output_prefix + '.phase.log') + + else: + + # Check that the phase.ind.mm file was created, give a warning otherwise + if not os.path.isfile(output_prefix + '.phase.ind.mm'): + logging.warning('shapeit intermediate file %s.phase.ind.mm does not exist' % output_prefix) + else: + # Remove shapeit phase.ind.mm file + os.remove(output_prefix + '.phase.ind.mm') + + # Check that the phase.snp.mm file was created, give a warning otherwise + if not os.path.isfile(output_prefix + '.phase.snp.mm'): + logging.warning('shapeit intermediate file %s.phase.snp.mm does not exist' % output_prefix) + else: + # Remove shapeit phase.snp.mm file + os.remove(output_prefix + '.phase.snp.mm') + + # Check that the haps file was created, give a warning otherwise + if not os.path.isfile(output_prefix + '.haps'): + logging.warning('shapeit intermediate file %s.haps does not exist' % output_prefix) + else: + # Remove shapeit haps file + os.remove(output_prefix + '.haps') + + # Check that the sample file was created, give a warning otherwise + if not os.path.isfile(output_prefix + '.sample'): + logging.warning('shapeit intermediate file %s.sample does not exist' % output_prefix) + else: + # Remove shapeit sample file + os.remove(output_prefix + '.sample') + + logging.info('shapeit-related files removed') + +def standard_shapeit_call (shapeit_call_args, output_prefix): + ''' + Calls shapeit using subprocess + + This function is used to call shapeit and passes the resulting stdout + to check_shapeit_for_errors to check for errors. The function also + passes output_prefix to check_shapeit_for_errors to delete shapeit + intermediate files if shapeit results in an error. + + Parameters + ---------- + shapeit_call_args : list + Argument list for shapeit + output_prefix : str + Output filename prefix + + ''' + + logging.info('shapeit phasing parameters assigned') + + # Phasing subprocess call + phase_call = subprocess.Popen(['shapeit'] + shapeit_call_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + phase_stdout, phase_stderr = phase_call.communicate() + + # Check if code is running in python 3 + if sys.version_info[0] == 3: + # Convert bytes to string + phase_stdout = phase_stdout.decode() + + # Check shapeit call for errors + check_shapeit_for_errors(phase_stdout, output_prefix) + + logging.info('shapeit phasing complete (HAPS format)') + +def call_shapeit (shapeit_call_args, output_prefix, output_format): + ''' + Calls shapeit and automates file conversions + + The function is used to call shapeit and also automates conversion to + VCF, VCF.GZ, and BCF using plink2 + + Parameters + ---------- + shapeit_call_args : list + Argument list for shapeit + output_prefix : str + Output filename prefix + output_format : str + Output file format + + ''' + + # Standard call to beagle + standard_shapeit_call(shapeit_call_args, output_prefix) + + # Convert haps-format to vcf + convert_haps_to_vcf(output_prefix, output_format) + + logging.info('HAPS conversion to VCF complete')