Previous changeset 4:901857c9b24f (2018-10-17) |
Commit message:
Uploaded |
modified:
bcftools.py |
b |
diff -r 901857c9b24f -r 86a9d8d5b291 bcftools.py --- a/bcftools.py Wed Oct 17 17:30:37 2018 -0400 +++ b/bcftools.py Wed Oct 17 17:34:34 2018 -0400 |
[ |
b"@@ -7,6 +7,31 @@\n \n from vcf_reader_func import checkFormat\n \n+def return_output_format_args (output_format):\n+ '''\n+ Return bcftools arguments for output format\n+\n+ Parameters\n+ ----------\n+ output_format : str\n+ The specified output format\n+\n+ Raises\n+ ------\n+ Exception\n+ If output format is unsupported by bcftools\n+ '''\n+\n+ # Return the output format arguments\n+ if output_format == 'vcf':\n+ return ['-O', 'v']\n+ elif output_format == 'bcf':\n+ return ['-O', 'b']\n+ elif output_format == 'vcf.gz':\n+ return ['-O', 'z']\n+ else:\n+ raise Exception('Unsupported file format')\n+\n def check_bcftools_for_errors (bcftools_stderr):\n '''\n Checks the bgzip stderr for errors\n@@ -18,28 +43,176 @@\n \n Raises\n ------\n- IOError\n+ Exception\n If bcftools stderr returns an error\n '''\n \n # Expand as errors are discovered\n- if bcftools_stderr:\n- logging.error(vcftools_stderr)\n- raise Exception(vcftools_stderr)\n+\n+ # Log warning messages\n+ if 'W::' in bcftools_stderr:\n+ logging.warning(bcftools_stderr.strip())\n+\n+ # Report errors that are not warnings\n+ elif bcftools_stderr:\n+ raise Exception(bcftools_stderr)\n+\n+def pipe_bcftools (bcftools_call_args):\n+ '''\n+ Calls bcftools with pipe output\n+\n+ The output of this function is the stdout and stderr of bcftools. This\n+ function should only be used if bcftools is being used as the stdin of\n+ another function. Please note that this function does not check the for\n+ errors in the bcftools call. Please check for errors after the call is\n+ closed using check_bcftools_for_errors.\n+\n+ Parameters\n+ ----------\n+ bcftools_stderr : str\n+ bcftools stderr\n+\n+ Returns\n+ -------\n+ bcftools_call : PIPE\n+ Pipe of subprocess call, including both stdout and stderr\n+\n+ '''\n+\n+ # bcftools subprocess call\n+ bcftools_call = subprocess.Popen(['bcftools'] + list(map(str, bcftools_call_args)), stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n+\n+ return bcftools_call\n+\n+def pipe_bcftools_to_chr (vcf_filename):\n+ '''\n+ Pipes chromosome and/or contig output of bcftools to a list of unique\n+ entries\n+\n+ The purpose of this function is to return a list of the unique\n+ chromosomes and/or contigs for use in other functions.\n+\n+ Parameters\n+ ----------\n+ vcf_filename : str\n+ VCF input\n+\n+ Returns\n+ -------\n+ chromosomes_to_return : list\n+ Unique chromosomes and/or contigs within VCF input\n+ '''\n+\n+ # Open bcftools pipe\n+ bcftools_call = pipe_bcftools(['query', '-f', '%CHROM\\n', vcf_filename])\n+\n+ # Create a set to hold unique chromosome\n+ chromosomes_to_return = set()\n+\n+ try:\n+\n+ # Current chromosomes/contigs, reduces duplicates if VCF is sorted\n+ previous_chr = None\n+\n+ # Iterate the bcftools stdout unless error occurs\n+ for bcftools_stdout_line in iter(bcftools_call.stdout.readline, b''):\n+ # Remove the newline character\n+ bcftools_line_chr = bcftools_stdout_line.strip()\n+ # Check if the bcftools bcftools chr is different from stored chr\n+ if bcftools_line_chr != previous_chr:\n+ # Store the new chr for comparisons to reduce duplicates\n+ previous_chr = bcftools_line_chr\n+ # Save the chr\n+ chromosomes_to_return.add(bcftools_line_chr)\n+\n+ except:\n+ raise Exception('bcftools call error')\n+\n+ # Close the bcftools stdout\n+ bcftools_call.stdout.close()\n+\n+ # Wait for bctools to finish\n+ bcftools_call.wait()\n+\n+ # Read the bcftools stderr\n+ bcftools_stderr = bcftools_call.stderr.read()\n+\n+ # Check if code is running in python 3\n+ "..b"ents\n+ output_format_args = return_output_format_args(output_format)\n+\n+ # Store the output format arguments\n+ concat_args.extend(output_format_args)\n+\n+ # Stores the specified output filename\n+ vcf_output = '%s.%s' % (output_prefix, output_format)\n+\n+ # Assigns the output file to the arguments\n+ concat_args.extend(['-o', vcf_output])\n+\n+ # Assigns the input files to merge\n+ concat_args.extend(filenames)\n+\n+ # Call bcftools\n+ call_bcftools(concat_args)\n+\n+ # Delete the original files once the merged file is created\n+ if not keep_original:\n+ for filename in filenames:\n+ if check_for_index(filename) == True:\n+ delete_index(filename)\n+ os.remove(filename)\n+\n+def convert_to_bcf (filename, output_prefix, keep_original = False):\n+ '''\n+ Converts a VCF-formatted file to BCF\n+\n+ This function will convert a VCF-formatted file to BCF with the\n+ specified filename prefix. The function also has the option to keep or\n+ delete the input file once the BCF file has been created.\n+\n+ Parameters\n+ ----------\n+ filename : str\n+ Filename of VCF-formatted input\n+ output_prefix : str\n+ Prefix of the BCF output (i.e. without file extension)\n+ keep_original : bool, optional\n+ If the input file should be kept once converted\n+ '''\n \n # Holds the arguments to convert to BCF format\n convert_args = ['convert', '-O', 'b']\n@@ -95,8 +478,29 @@\n # Call bcftools\n call_bcftools(convert_args)\n \n+ # Delete the original file once the bcf file is created\n+ if not keep_original:\n+ if check_for_index(filename) == True:\n+ delete_index(filename)\n+ os.remove(filename)\n \n-def convert_to_vcf (filename, output_prefix):\n+def convert_to_vcf (filename, output_prefix, keep_original = False):\n+ '''\n+ Converts a VCF-formatted file to VCF\n+\n+ This function will convert a VCF-formatted file to VCF with the\n+ specified filename prefix. The function also has the option to keep or\n+ delete the input file once the VCF file has been created.\n+\n+ Parameters\n+ ----------\n+ filename : str\n+ Filename of VCF-formatted input\n+ output_prefix : str\n+ Prefix of the VCF output (i.e. without file extension)\n+ keep_original : bool, optional\n+ If the input file should be kept once converted\n+ '''\n \n # Holds the arguments to convert to VCF format\n convert_args = ['view', '-O', 'v']\n@@ -113,7 +517,29 @@\n # Call bcftools\n call_bcftools(convert_args)\n \n-def convert_to_vcfgz (filename, output_prefix):\n+ # Delete the original file once the vcf file is created\n+ if not keep_original:\n+ if check_for_index(filename) == True:\n+ delete_index(filename)\n+ os.remove(filename)\n+\n+def convert_to_vcfgz (filename, output_prefix, keep_original = False):\n+ '''\n+ Converts a VCF-formatted file to bgzipped-VCF\n+\n+ This function will convert a VCF-formatted file to bgzipped-VCF with the\n+ specified filename prefix. The function also has the option to keep or\n+ delete the input file once the bgzipped-VCF file has been created.\n+\n+ Parameters\n+ ----------\n+ filename : str\n+ Filename of VCF-formatted input\n+ output_prefix : str\n+ Prefix of the bgzipped-VCF output (i.e. without file extension)\n+ keep_original : bool, optional\n+ If the input file should be kept once converted\n+ '''\n \n # Holds the arguments to convert to VCFGZ format\n convert_args = ['view', '-O', 'z']\n@@ -129,3 +555,9 @@\n \n # Call bcftools\n call_bcftools(convert_args)\n+\n+ # Delete the original file once the vcfgz file is created\n+ if not keep_original:\n+ if check_for_index(filename) == True:\n+ delete_index(filename)\n+ os.remove(filename)\n" |