| 22 | 1 ''' | 
|  | 2 Created on 22 Sep 2015 | 
|  | 3 | 
|  | 4 @author: alex | 
|  | 5 ''' | 
|  | 6 from collections import OrderedDict | 
|  | 7 import logging | 
|  | 8 import os | 
|  | 9 import subprocess | 
|  | 10 | 
|  | 11 from phe.variant import VariantCaller | 
|  | 12 | 
|  | 13 | 
|  | 14 class GATKVariantCaller(VariantCaller): | 
|  | 15     """Implemetation of the Broad institute's variant caller.""" | 
|  | 16 | 
|  | 17     name = "gatk" | 
|  | 18     """Plain text name of the variant caller.""" | 
|  | 19 | 
|  | 20     _default_options = "--sample_ploidy 2 --genotype_likelihoods_model BOTH -rf BadCigar -out_mode EMIT_ALL_SITES -nt 1" | 
|  | 21     """Default options for the variant caller.""" | 
|  | 22 | 
|  | 23     def __init__(self, cmd_options=None): | 
|  | 24         """Constructor""" | 
|  | 25         if cmd_options is None: | 
|  | 26             cmd_options = self._default_options | 
|  | 27 | 
|  | 28         super(GATKVariantCaller, self).__init__(cmd_options=cmd_options) | 
|  | 29 | 
|  | 30         self.last_command = None | 
|  | 31 | 
|  | 32     def get_info(self, plain=False): | 
|  | 33         d = {"name": "gatk", "version": self.get_version(), "command": self.last_command} | 
|  | 34 | 
|  | 35         if plain: | 
|  | 36             result = "GATK(%(version)s): %(command)s" % d | 
|  | 37         else: | 
|  | 38             result = OrderedDict(d) | 
|  | 39 | 
|  | 40         return result | 
|  | 41 | 
|  | 42     def get_version(self): | 
|  | 43 | 
|  | 44         p = subprocess.Popen(["java", "-jar", os.environ["GATK_JAR"], "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | 
|  | 45         (output, _) = p.communicate() | 
|  | 46 | 
|  | 47         # last character is EOL. | 
|  | 48         version = output.split("\n")[-2] | 
|  | 49 | 
|  | 50         return version | 
|  | 51 | 
|  | 52     def make_vcf(self, *args, **kwargs): | 
|  | 53         ref = kwargs.get("ref") | 
|  | 54         bam = kwargs.get("bam") | 
|  | 55 | 
|  | 56         make_aux = kwargs.get("make_aux", False) | 
|  | 57 | 
|  | 58         if kwargs.get("vcf_file") is None: | 
|  | 59             kwargs["vcf_file"] = "variants.vcf" | 
|  | 60 | 
|  | 61         opts = {"ref": os.path.abspath(ref), | 
|  | 62                 "bam": os.path.abspath(bam), | 
|  | 63                 "gatk_jar": os.environ["GATK_JAR"], | 
|  | 64                 "all_variants_file": os.path.abspath(kwargs.get("vcf_file")), | 
|  | 65                 "extra_cmd_options": self.cmd_options} | 
|  | 66         if make_aux: | 
|  | 67             if not self.create_aux_files(ref): | 
|  | 68                 logging.warn("Auxiliary files were not created.") | 
|  | 69                 return False | 
|  | 70 | 
|  | 71         # Call variants | 
|  | 72         # FIXME: Sample ploidy = 2? | 
|  | 73         os.environ["GATK_JAR"] | 
|  | 74         cmd = "java -XX:+UseSerialGC -jar %(gatk_jar)s -T UnifiedGenotyper -R %(ref)s -I %(bam)s -o %(all_variants_file)s %(extra_cmd_options)s" % opts | 
|  | 75         success = os.system(cmd) | 
|  | 76 | 
|  | 77         if success != 0: | 
|  | 78             logging.warn("Calling variants returned non-zero exit status.") | 
|  | 79             return False | 
|  | 80 | 
|  | 81         self.last_command = cmd | 
|  | 82 | 
|  | 83         return True | 
|  | 84 | 
|  | 85     def create_aux_files(self, ref): | 
|  | 86         """Create auxiliary files needed for this variant. | 
|  | 87 | 
|  | 88         Tools needed: samtools and picard tools. Picard tools is a Java | 
|  | 89         library that can be defined using environment variable: PICARD_JAR | 
|  | 90         specifying path to picard.jar or PICARD_TOOLS_PATH specifying path | 
|  | 91         to the directory where separate jars are (older version before jars | 
|  | 92         were merged into a single picard.jar). | 
|  | 93         Parameters: | 
|  | 94         ----------- | 
|  | 95         ref: str | 
|  | 96             Path to the reference file. | 
|  | 97 | 
|  | 98         Returns: | 
|  | 99         -------- | 
|  | 100         bool: | 
|  | 101             True if auxiliary files were created, False otherwise. | 
|  | 102         """ | 
|  | 103 | 
|  | 104         ref_name, _ = os.path.splitext(ref) | 
|  | 105 | 
|  | 106         success = os.system("samtools faidx %s" % ref) | 
|  | 107 | 
|  | 108         if success != 0: | 
|  | 109             logging.warn("Fasta index could not be created.") | 
|  | 110             return False | 
|  | 111 | 
|  | 112         d = {"ref": ref, "ref_name": ref_name} | 
|  | 113 | 
|  | 114         if os.environ.get("PICARD_TOOLS_PATH"): | 
|  | 115             d["picard_tools_path"] = os.path.join(os.environ["PICARD_TOOLS_PATH"], "CreateSequenceDictionary.jar") | 
|  | 116         elif os.environ.get("PICARD_JAR"): | 
|  | 117             # This is used in newer version of PICARD tool where multiple | 
|  | 118             #    jars were merged into a single jar file. | 
|  | 119             d["picard_tools_path"] = "%s %s" % (os.environ["PICARD_JAR"], "CreateSequenceDictionary") | 
|  | 120         else: | 
|  | 121             logging.error("Picard tools are not present in the path.") | 
|  | 122             return False | 
|  | 123 | 
|  | 124         if not os.path.exists("%s.dict" % ref_name): | 
|  | 125             success = os.system("java -jar %(picard_tools_path)s R=%(ref)s O=%(ref_name)s.dict" % d) | 
|  | 126 | 
|  | 127             if success != 0: | 
|  | 128                 logging.warn("Dictionary for the %s reference could not be created", ref) | 
|  | 129                 return False | 
|  | 130         else: | 
|  | 131             logging.debug("PICARD AUX EXISTS.") | 
|  | 132 | 
|  | 133         return True |