view beagle.py @ 4:901857c9b24f draft

Uploaded
author jaredgk
date Wed, 17 Oct 2018 17:30:37 -0400
parents 54c84f7dcb2c
children
line wrap: on
line source

import os
import sys
import subprocess
import shutil
import argparse
import glob
import logging

sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))

from vcf_reader_func import checkFormat
from logging_module import initLogger, logArgs
from vcftools import bgzip_decompress_vcfgz
from bcftools import convert_to_bcf, check_for_index, create_index

def delete_beagle_log (output_prefix):
    '''
        Delete beagle log file

        This function is used to delete beagle's log file if an error is
        encountered. A warning is produced if the log file cannot be found.

        Parameters
        ----------
        output_prefix : str
            Output file prefix
    '''

    # Check that log file exists, if not return warning
    if not os.path.isfile(output_prefix + '.log'):
        logging.warning('beagle log file %s.log does not exist' % output_prefix)
    else:
        os.remove(output_prefix + '.log')

def check_beagle_for_errors (beagle_stderr, output_prefix):
    '''
        Checks the beagle stdout for errors

        Parameters
        ----------
        beagle_stderr : str
            beagle stderr
        output_prefix : str
            Output file prefix

        Raises
        ------
        Exception
            If beagle stdout returns an error
    '''

    # Check if beagle completed without an error
    if not beagle_stderr.strip():
        pass

    # Print missing data message if that is likely
    elif 'ERROR: genotype is missing allele separator:' in str(beagle_stderr):
        # Delete the beagle log file
        delete_beagle_log(output_prefix)

        # Store reported error
        error_reported = 'ERROR: genotype is missing allele separator'
        # Store message for user about error
        user_message = 'Please confirm the input has no missing data.'
        # Report on the error
        raise Exception(error_reported + '\n' + user_message)

    # Print output for beagle if error is detected
    elif 'ERROR:' in str(beagle_stderr):
        # Delete the beagle log file
        delete_beagle_log(output_prefix)

        # Splits log into list of lines
        beagle_stderr_lines = beagle_stderr.splitlines()
        # Prints the error(s)
        raise Exception('\n'.join((output_line for output_line in beagle_stderr_lines if output_line.startswith('ERROR:'))))

    # Print output if not completed and no error found. Unlikely to be used, but included.
    else:
        # Delete the beagle log file
        delete_beagle_log(output_prefix)

        raise Exception(beagle_stderr)


def standard_beagle_call (beagle_path, beagle_call_args, output_prefix):
    '''
        Calls beagle using subprocess

        This function is used to call beagle under standard conditions. The
        functions then passes the stderr to check_beagle_for_errors to check
        for errors.

        Parameters
        ----------
        beagle_path : str
            Path to beagle.jar
        beagle_call_args : list
            Argument list for beagle
        output_prefix : str
            Output file prefix
    '''

    # Assign location of beagle jar file
    beagle_jar = os.path.join(beagle_path, 'beagle.jar')

    # Check that beagle.jar exists
    if not os.path.isfile(beagle_jar):
        raise IOError('beagle.jar not found. Path specified: %s' % beagle_path)

    logging.info('beagle phasing parameters assigned')

    # Phasing subprocess call
    phase_call = subprocess.Popen(['java', '-jar', beagle_jar] + beagle_call_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
    phase_stdout, phase_stderr = phase_call.communicate()

    # Check if code is running in python 3
    if sys.version_info[0] == 3:
        # Convert bytes to string
        phase_stderr = phase_stderr.decode()

    # Check beagle call for errors
    check_beagle_for_errors(phase_stderr, output_prefix)

    logging.info('beagle phasing complete')

def call_beagle (beagle_path, beagle_call_args, output_prefix, output_format):
    '''
        Automates beagle calls

        This function passes the argument list to standard_beagle_call. Once the
        beagle call has finished, the function will automatically convert the
        bgzip compressed output of beagle to BCF and VCF, if either format is
        specified.

        Parameters
        ----------
        beagle_path : str
            Path to beagle.jar
        beagle_call_args : list
            Argument list for beagle
        output_prefix : str
            Output file prefix
        output_format : str
            Output file format
    '''
    print (beagle_call_args)
    # Standard call to beagle
    standard_beagle_call(beagle_path, beagle_call_args, output_prefix)

    # Decompress if a VCF files is requested
    if output_format == 'vcf':
        bgzip_decompress_vcfgz(output_prefix + '.vcf.gz')

    # Convert to BCF if requested
    elif output_format == 'bcf':

        # Check if there is an index file
        if check_for_index(output_prefix + '.vcf.gz') == False:
            # Create an index if not found
            create_index(output_prefix + '.vcf.gz')
        # Convert vcf.gz to bcf
        convert_to_bcf(output_prefix + '.vcf.gz', output_prefix)