14
|
1 '''
|
|
2 Created on 22 Sep 2015
|
|
3
|
|
4 @author: alex
|
|
5 '''
|
|
6 from collections import OrderedDict
|
|
7 import logging
|
|
8 import os
|
|
9 import subprocess
|
|
10
|
|
11 from phe.variant import VariantCaller
|
|
12
|
|
13
|
|
14 class GATKVariantCaller(VariantCaller):
|
|
15 """Implemetation of the Broad institute's variant caller."""
|
|
16
|
|
17 name = "gatk"
|
|
18 """Plain text name of the variant caller."""
|
|
19
|
|
20 _default_options = "--sample_ploidy 2 --genotype_likelihoods_model BOTH -rf BadCigar -out_mode EMIT_ALL_SITES -nt 1"
|
|
21 """Default options for the variant caller."""
|
|
22
|
|
23 def __init__(self, cmd_options=None):
|
|
24 """Constructor"""
|
|
25 if cmd_options is None:
|
|
26 cmd_options = self._default_options
|
|
27
|
|
28 super(GATKVariantCaller, self).__init__(cmd_options=cmd_options)
|
|
29
|
|
30 self.last_command = None
|
|
31
|
|
32 def get_info(self, plain=False):
|
|
33 d = {"name": "gatk", "version": self.get_version(), "command": self.last_command}
|
|
34
|
|
35 if plain:
|
|
36 result = "GATK(%(version)s): %(command)s" % d
|
|
37 else:
|
|
38 result = OrderedDict(d)
|
|
39
|
|
40 return result
|
|
41
|
|
42 def get_version(self):
|
|
43
|
|
44 p = subprocess.Popen(["java", "-jar", os.environ["GATK_JAR"], "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
45 (output, _) = p.communicate()
|
|
46
|
|
47 # last character is EOL.
|
|
48 version = output.split("\n")[-2]
|
|
49
|
|
50 return version
|
|
51
|
|
52 def make_vcf(self, *args, **kwargs):
|
|
53 ref = kwargs.get("ref")
|
|
54 bam = kwargs.get("bam")
|
|
55
|
|
56 if kwargs.get("vcf_file") is None:
|
|
57 kwargs["vcf_file"] = "variants.vcf"
|
|
58
|
|
59 opts = {"ref": os.path.abspath(ref),
|
|
60 "bam": os.path.abspath(bam),
|
|
61 "gatk_jar": os.environ["GATK_JAR"],
|
|
62 "all_variants_file": os.path.abspath(kwargs.get("vcf_file")),
|
|
63 "extra_cmd_options": self.cmd_options}
|
|
64
|
|
65 # if not self.create_aux_files(ref):
|
|
66 # logging.warn("Auxiliary files were not created.")
|
|
67 # return False
|
|
68
|
|
69 # Call variants
|
|
70 # FIXME: Sample ploidy = 2?
|
|
71 os.environ["GATK_JAR"]
|
|
72 cmd = "java -XX:+UseSerialGC -jar %(gatk_jar)s -T UnifiedGenotyper -R %(ref)s -I %(bam)s -o %(all_variants_file)s %(extra_cmd_options)s" % opts
|
|
73 success = os.system(cmd)
|
|
74
|
|
75 if success != 0:
|
|
76 logging.warn("Calling variants returned non-zero exit status.")
|
|
77 return False
|
|
78
|
|
79 self.last_command = cmd
|
|
80
|
|
81 return True
|
|
82
|
|
83 def create_aux_files(self, ref):
|
|
84 """Create auxiliary files needed for this variant.
|
|
85
|
|
86 Tools needed: samtools and picard tools. Picard tools is a Java
|
|
87 library that can be defined using environment variable: PICARD_JAR
|
|
88 specifying path to picard.jar or PICARD_TOOLS_PATH specifying path
|
|
89 to the directory where separate jars are (older version before jars
|
|
90 were merged into a single picard.jar).
|
|
91 Parameters:
|
|
92 -----------
|
|
93 ref: str
|
|
94 Path to the reference file.
|
|
95
|
|
96 Returns:
|
|
97 --------
|
|
98 bool:
|
|
99 True if auxiliary files were created, False otherwise.
|
|
100 """
|
|
101
|
|
102 ref_name, _ = os.path.splitext(ref)
|
|
103
|
|
104 success = os.system("samtools faidx %s" % ref)
|
|
105
|
|
106 if success != 0:
|
|
107 logging.warn("Fasta index could not be created.")
|
|
108 return False
|
|
109
|
|
110 d = {"ref": ref, "ref_name": ref_name}
|
|
111
|
|
112 if os.environ.get("PICARD_TOOLS_PATH"):
|
|
113 d["picard_tools_path"] = os.path.join(os.environ["PICARD_TOOLS_PATH"], "CreateSequenceDictionary.jar")
|
|
114 elif os.environ.get("PICARD_JAR"):
|
|
115 # This is used in newer version of PICARD tool where multiple
|
|
116 # jars were merged into a single jar file.
|
|
117 d["picard_tools_path"] = "%s %s" % (os.environ["PICARD_JAR"], "CreateSequenceDictionary")
|
|
118 else:
|
|
119 logging.error("Picard tools are not present in the path.")
|
|
120 return False
|
|
121
|
|
122 success = os.system("java -jar %(picard_tools_path)s R=%(ref)s O=%(ref_name)s.dict" % d)
|
|
123
|
|
124 if success != 0:
|
|
125 logging.warn("Dictionary for the %s reference could not be created", ref)
|
|
126 return False
|