Mercurial > repos > ulfschaefer > vcfs2fasta
comparison phe/variant/GATKVariantCaller.py @ 22:96f393ad7fc6 draft default tip
Uploaded
| author | ulfschaefer |
|---|---|
| date | Wed, 23 Dec 2015 04:50:58 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 21:b09ffe50c378 | 22:96f393ad7fc6 |
|---|---|
| 1 ''' | |
| 2 Created on 22 Sep 2015 | |
| 3 | |
| 4 @author: alex | |
| 5 ''' | |
| 6 from collections import OrderedDict | |
| 7 import logging | |
| 8 import os | |
| 9 import subprocess | |
| 10 | |
| 11 from phe.variant import VariantCaller | |
| 12 | |
| 13 | |
| 14 class GATKVariantCaller(VariantCaller): | |
| 15 """Implemetation of the Broad institute's variant caller.""" | |
| 16 | |
| 17 name = "gatk" | |
| 18 """Plain text name of the variant caller.""" | |
| 19 | |
| 20 _default_options = "--sample_ploidy 2 --genotype_likelihoods_model BOTH -rf BadCigar -out_mode EMIT_ALL_SITES -nt 1" | |
| 21 """Default options for the variant caller.""" | |
| 22 | |
| 23 def __init__(self, cmd_options=None): | |
| 24 """Constructor""" | |
| 25 if cmd_options is None: | |
| 26 cmd_options = self._default_options | |
| 27 | |
| 28 super(GATKVariantCaller, self).__init__(cmd_options=cmd_options) | |
| 29 | |
| 30 self.last_command = None | |
| 31 | |
| 32 def get_info(self, plain=False): | |
| 33 d = {"name": "gatk", "version": self.get_version(), "command": self.last_command} | |
| 34 | |
| 35 if plain: | |
| 36 result = "GATK(%(version)s): %(command)s" % d | |
| 37 else: | |
| 38 result = OrderedDict(d) | |
| 39 | |
| 40 return result | |
| 41 | |
| 42 def get_version(self): | |
| 43 | |
| 44 p = subprocess.Popen(["java", "-jar", os.environ["GATK_JAR"], "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | |
| 45 (output, _) = p.communicate() | |
| 46 | |
| 47 # last character is EOL. | |
| 48 version = output.split("\n")[-2] | |
| 49 | |
| 50 return version | |
| 51 | |
| 52 def make_vcf(self, *args, **kwargs): | |
| 53 ref = kwargs.get("ref") | |
| 54 bam = kwargs.get("bam") | |
| 55 | |
| 56 make_aux = kwargs.get("make_aux", False) | |
| 57 | |
| 58 if kwargs.get("vcf_file") is None: | |
| 59 kwargs["vcf_file"] = "variants.vcf" | |
| 60 | |
| 61 opts = {"ref": os.path.abspath(ref), | |
| 62 "bam": os.path.abspath(bam), | |
| 63 "gatk_jar": os.environ["GATK_JAR"], | |
| 64 "all_variants_file": os.path.abspath(kwargs.get("vcf_file")), | |
| 65 "extra_cmd_options": self.cmd_options} | |
| 66 if make_aux: | |
| 67 if not self.create_aux_files(ref): | |
| 68 logging.warn("Auxiliary files were not created.") | |
| 69 return False | |
| 70 | |
| 71 # Call variants | |
| 72 # FIXME: Sample ploidy = 2? | |
| 73 os.environ["GATK_JAR"] | |
| 74 cmd = "java -XX:+UseSerialGC -jar %(gatk_jar)s -T UnifiedGenotyper -R %(ref)s -I %(bam)s -o %(all_variants_file)s %(extra_cmd_options)s" % opts | |
| 75 success = os.system(cmd) | |
| 76 | |
| 77 if success != 0: | |
| 78 logging.warn("Calling variants returned non-zero exit status.") | |
| 79 return False | |
| 80 | |
| 81 self.last_command = cmd | |
| 82 | |
| 83 return True | |
| 84 | |
| 85 def create_aux_files(self, ref): | |
| 86 """Create auxiliary files needed for this variant. | |
| 87 | |
| 88 Tools needed: samtools and picard tools. Picard tools is a Java | |
| 89 library that can be defined using environment variable: PICARD_JAR | |
| 90 specifying path to picard.jar or PICARD_TOOLS_PATH specifying path | |
| 91 to the directory where separate jars are (older version before jars | |
| 92 were merged into a single picard.jar). | |
| 93 Parameters: | |
| 94 ----------- | |
| 95 ref: str | |
| 96 Path to the reference file. | |
| 97 | |
| 98 Returns: | |
| 99 -------- | |
| 100 bool: | |
| 101 True if auxiliary files were created, False otherwise. | |
| 102 """ | |
| 103 | |
| 104 ref_name, _ = os.path.splitext(ref) | |
| 105 | |
| 106 success = os.system("samtools faidx %s" % ref) | |
| 107 | |
| 108 if success != 0: | |
| 109 logging.warn("Fasta index could not be created.") | |
| 110 return False | |
| 111 | |
| 112 d = {"ref": ref, "ref_name": ref_name} | |
| 113 | |
| 114 if os.environ.get("PICARD_TOOLS_PATH"): | |
| 115 d["picard_tools_path"] = os.path.join(os.environ["PICARD_TOOLS_PATH"], "CreateSequenceDictionary.jar") | |
| 116 elif os.environ.get("PICARD_JAR"): | |
| 117 # This is used in newer version of PICARD tool where multiple | |
| 118 # jars were merged into a single jar file. | |
| 119 d["picard_tools_path"] = "%s %s" % (os.environ["PICARD_JAR"], "CreateSequenceDictionary") | |
| 120 else: | |
| 121 logging.error("Picard tools are not present in the path.") | |
| 122 return False | |
| 123 | |
| 124 if not os.path.exists("%s.dict" % ref_name): | |
| 125 success = os.system("java -jar %(picard_tools_path)s R=%(ref)s O=%(ref_name)s.dict" % d) | |
| 126 | |
| 127 if success != 0: | |
| 128 logging.warn("Dictionary for the %s reference could not be created", ref) | |
| 129 return False | |
| 130 else: | |
| 131 logging.debug("PICARD AUX EXISTS.") | |
| 132 | |
| 133 return True |
