| 14 | 1 ''' | 
|  | 2 Created on 22 Sep 2015 | 
|  | 3 | 
|  | 4 @author: alex | 
|  | 5 ''' | 
|  | 6 from collections import OrderedDict | 
|  | 7 import logging | 
|  | 8 import os | 
|  | 9 import subprocess | 
|  | 10 | 
|  | 11 from phe.variant import VariantCaller | 
|  | 12 | 
|  | 13 | 
|  | 14 class GATKVariantCaller(VariantCaller): | 
|  | 15     """Implemetation of the Broad institute's variant caller.""" | 
|  | 16 | 
|  | 17     name = "gatk" | 
|  | 18     """Plain text name of the variant caller.""" | 
|  | 19 | 
|  | 20     _default_options = "--sample_ploidy 2 --genotype_likelihoods_model BOTH -rf BadCigar -out_mode EMIT_ALL_SITES -nt 1" | 
|  | 21     """Default options for the variant caller.""" | 
|  | 22 | 
|  | 23     def __init__(self, cmd_options=None): | 
|  | 24         """Constructor""" | 
|  | 25         if cmd_options is None: | 
|  | 26             cmd_options = self._default_options | 
|  | 27 | 
|  | 28         super(GATKVariantCaller, self).__init__(cmd_options=cmd_options) | 
|  | 29 | 
|  | 30         self.last_command = None | 
|  | 31 | 
|  | 32     def get_info(self, plain=False): | 
|  | 33         d = {"name": "gatk", "version": self.get_version(), "command": self.last_command} | 
|  | 34 | 
|  | 35         if plain: | 
|  | 36             result = "GATK(%(version)s): %(command)s" % d | 
|  | 37         else: | 
|  | 38             result = OrderedDict(d) | 
|  | 39 | 
|  | 40         return result | 
|  | 41 | 
|  | 42     def get_version(self): | 
|  | 43 | 
|  | 44         p = subprocess.Popen(["java", "-jar", os.environ["GATK_JAR"], "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | 
|  | 45         (output, _) = p.communicate() | 
|  | 46 | 
|  | 47         # last character is EOL. | 
|  | 48         version = output.split("\n")[-2] | 
|  | 49 | 
|  | 50         return version | 
|  | 51 | 
|  | 52     def make_vcf(self, *args, **kwargs): | 
|  | 53         ref = kwargs.get("ref") | 
|  | 54         bam = kwargs.get("bam") | 
|  | 55 | 
|  | 56         if kwargs.get("vcf_file") is None: | 
|  | 57             kwargs["vcf_file"] = "variants.vcf" | 
|  | 58 | 
|  | 59         opts = {"ref": os.path.abspath(ref), | 
|  | 60                 "bam": os.path.abspath(bam), | 
|  | 61                 "gatk_jar": os.environ["GATK_JAR"], | 
|  | 62                 "all_variants_file": os.path.abspath(kwargs.get("vcf_file")), | 
|  | 63                 "extra_cmd_options": self.cmd_options} | 
|  | 64 | 
|  | 65 #         if not self.create_aux_files(ref): | 
|  | 66 #             logging.warn("Auxiliary files were not created.") | 
|  | 67 #             return False | 
|  | 68 | 
|  | 69         # Call variants | 
|  | 70         # FIXME: Sample ploidy = 2? | 
|  | 71         os.environ["GATK_JAR"] | 
|  | 72         cmd = "java -XX:+UseSerialGC -jar %(gatk_jar)s -T UnifiedGenotyper -R %(ref)s -I %(bam)s -o %(all_variants_file)s %(extra_cmd_options)s" % opts | 
|  | 73         success = os.system(cmd) | 
|  | 74 | 
|  | 75         if success != 0: | 
|  | 76             logging.warn("Calling variants returned non-zero exit status.") | 
|  | 77             return False | 
|  | 78 | 
|  | 79         self.last_command = cmd | 
|  | 80 | 
|  | 81         return True | 
|  | 82 | 
|  | 83     def create_aux_files(self, ref): | 
|  | 84         """Create auxiliary files needed for this variant. | 
|  | 85 | 
|  | 86         Tools needed: samtools and picard tools. Picard tools is a Java | 
|  | 87         library that can be defined using environment variable: PICARD_JAR | 
|  | 88         specifying path to picard.jar or PICARD_TOOLS_PATH specifying path | 
|  | 89         to the directory where separate jars are (older version before jars | 
|  | 90         were merged into a single picard.jar). | 
|  | 91         Parameters: | 
|  | 92         ----------- | 
|  | 93         ref: str | 
|  | 94             Path to the reference file. | 
|  | 95 | 
|  | 96         Returns: | 
|  | 97         -------- | 
|  | 98         bool: | 
|  | 99             True if auxiliary files were created, False otherwise. | 
|  | 100         """ | 
|  | 101 | 
|  | 102         ref_name, _ = os.path.splitext(ref) | 
|  | 103 | 
|  | 104         success = os.system("samtools faidx %s" % ref) | 
|  | 105 | 
|  | 106         if success != 0: | 
|  | 107             logging.warn("Fasta index could not be created.") | 
|  | 108             return False | 
|  | 109 | 
|  | 110         d = {"ref": ref, "ref_name": ref_name} | 
|  | 111 | 
|  | 112         if os.environ.get("PICARD_TOOLS_PATH"): | 
|  | 113             d["picard_tools_path"] = os.path.join(os.environ["PICARD_TOOLS_PATH"], "CreateSequenceDictionary.jar") | 
|  | 114         elif os.environ.get("PICARD_JAR"): | 
|  | 115             # This is used in newer version of PICARD tool where multiple | 
|  | 116             #    jars were merged into a single jar file. | 
|  | 117             d["picard_tools_path"] = "%s %s" % (os.environ["PICARD_JAR"], "CreateSequenceDictionary") | 
|  | 118         else: | 
|  | 119             logging.error("Picard tools are not present in the path.") | 
|  | 120             return False | 
|  | 121 | 
|  | 122         success = os.system("java -jar %(picard_tools_path)s R=%(ref)s O=%(ref_name)s.dict" % d) | 
|  | 123 | 
|  | 124         if success != 0: | 
|  | 125             logging.warn("Dictionary for the %s reference could not be created", ref) | 
|  | 126             return False |