view phe/variant/GATKVariantCaller.py @ 19:0b1f17ef4317 draft

Uploaded
author ulfschaefer
date Fri, 18 Dec 2015 06:06:14 -0500
parents f72039c5faa4
children
line wrap: on
line source

'''
Created on 22 Sep 2015

@author: alex
'''
from collections import OrderedDict
import logging
import os
import subprocess

from phe.variant import VariantCaller


class GATKVariantCaller(VariantCaller):
    """Implemetation of the Broad institute's variant caller."""

    name = "gatk"
    """Plain text name of the variant caller."""

    _default_options = "--sample_ploidy 2 --genotype_likelihoods_model BOTH -rf BadCigar -out_mode EMIT_ALL_SITES -nt 1"
    """Default options for the variant caller."""

    def __init__(self, cmd_options=None):
        """Constructor"""
        if cmd_options is None:
            cmd_options = self._default_options

        super(GATKVariantCaller, self).__init__(cmd_options=cmd_options)

        self.last_command = None

    def get_info(self, plain=False):
        d = {"name": "gatk", "version": self.get_version(), "command": self.last_command}

        if plain:
            result = "GATK(%(version)s): %(command)s" % d
        else:
            result = OrderedDict(d)

        return result

    def get_version(self):

        p = subprocess.Popen(["java", "-jar", os.environ["GATK_JAR"], "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        (output, _) = p.communicate()

        # last character is EOL.
        version = output.split("\n")[-2]

        return version

    def make_vcf(self, *args, **kwargs):
        ref = kwargs.get("ref")
        bam = kwargs.get("bam")

        if kwargs.get("vcf_file") is None:
            kwargs["vcf_file"] = "variants.vcf"

        opts = {"ref": os.path.abspath(ref),
                "bam": os.path.abspath(bam),
                "gatk_jar": os.environ["GATK_JAR"],
                "all_variants_file": os.path.abspath(kwargs.get("vcf_file")),
                "extra_cmd_options": self.cmd_options}

#         if not self.create_aux_files(ref):
#             logging.warn("Auxiliary files were not created.")
#             return False

        # Call variants
        # FIXME: Sample ploidy = 2?
        os.environ["GATK_JAR"]
        cmd = "java -XX:+UseSerialGC -jar %(gatk_jar)s -T UnifiedGenotyper -R %(ref)s -I %(bam)s -o %(all_variants_file)s %(extra_cmd_options)s" % opts
        success = os.system(cmd)

        if success != 0:
            logging.warn("Calling variants returned non-zero exit status.")
            return False

        self.last_command = cmd

        return True

    def create_aux_files(self, ref):
        """Create auxiliary files needed for this variant.

        Tools needed: samtools and picard tools. Picard tools is a Java
        library that can be defined using environment variable: PICARD_JAR
        specifying path to picard.jar or PICARD_TOOLS_PATH specifying path
        to the directory where separate jars are (older version before jars
        were merged into a single picard.jar).
        Parameters:
        -----------
        ref: str
            Path to the reference file.
        
        Returns:
        --------
        bool:
            True if auxiliary files were created, False otherwise.
        """

        ref_name, _ = os.path.splitext(ref)

        success = os.system("samtools faidx %s" % ref)

        if success != 0:
            logging.warn("Fasta index could not be created.")
            return False

        d = {"ref": ref, "ref_name": ref_name}

        if os.environ.get("PICARD_TOOLS_PATH"):
            d["picard_tools_path"] = os.path.join(os.environ["PICARD_TOOLS_PATH"], "CreateSequenceDictionary.jar")
        elif os.environ.get("PICARD_JAR"):
            # This is used in newer version of PICARD tool where multiple
            #    jars were merged into a single jar file.
            d["picard_tools_path"] = "%s %s" % (os.environ["PICARD_JAR"], "CreateSequenceDictionary")
        else:
            logging.error("Picard tools are not present in the path.")
            return False

        success = os.system("java -jar %(picard_tools_path)s R=%(ref)s O=%(ref_name)s.dict" % d)

        if success != 0:
            logging.warn("Dictionary for the %s reference could not be created", ref)
            return False