Mercurial > repos > ulfschaefer > vcfs2fasta
comparison phe/variant/GATKVariantCaller.py @ 14:f72039c5faa4 draft
Uploaded
author | ulfschaefer |
---|---|
date | Wed, 16 Dec 2015 07:29:05 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
13:2e69ce9dca65 | 14:f72039c5faa4 |
---|---|
1 ''' | |
2 Created on 22 Sep 2015 | |
3 | |
4 @author: alex | |
5 ''' | |
6 from collections import OrderedDict | |
7 import logging | |
8 import os | |
9 import subprocess | |
10 | |
11 from phe.variant import VariantCaller | |
12 | |
13 | |
14 class GATKVariantCaller(VariantCaller): | |
15 """Implemetation of the Broad institute's variant caller.""" | |
16 | |
17 name = "gatk" | |
18 """Plain text name of the variant caller.""" | |
19 | |
20 _default_options = "--sample_ploidy 2 --genotype_likelihoods_model BOTH -rf BadCigar -out_mode EMIT_ALL_SITES -nt 1" | |
21 """Default options for the variant caller.""" | |
22 | |
23 def __init__(self, cmd_options=None): | |
24 """Constructor""" | |
25 if cmd_options is None: | |
26 cmd_options = self._default_options | |
27 | |
28 super(GATKVariantCaller, self).__init__(cmd_options=cmd_options) | |
29 | |
30 self.last_command = None | |
31 | |
32 def get_info(self, plain=False): | |
33 d = {"name": "gatk", "version": self.get_version(), "command": self.last_command} | |
34 | |
35 if plain: | |
36 result = "GATK(%(version)s): %(command)s" % d | |
37 else: | |
38 result = OrderedDict(d) | |
39 | |
40 return result | |
41 | |
42 def get_version(self): | |
43 | |
44 p = subprocess.Popen(["java", "-jar", os.environ["GATK_JAR"], "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | |
45 (output, _) = p.communicate() | |
46 | |
47 # last character is EOL. | |
48 version = output.split("\n")[-2] | |
49 | |
50 return version | |
51 | |
52 def make_vcf(self, *args, **kwargs): | |
53 ref = kwargs.get("ref") | |
54 bam = kwargs.get("bam") | |
55 | |
56 if kwargs.get("vcf_file") is None: | |
57 kwargs["vcf_file"] = "variants.vcf" | |
58 | |
59 opts = {"ref": os.path.abspath(ref), | |
60 "bam": os.path.abspath(bam), | |
61 "gatk_jar": os.environ["GATK_JAR"], | |
62 "all_variants_file": os.path.abspath(kwargs.get("vcf_file")), | |
63 "extra_cmd_options": self.cmd_options} | |
64 | |
65 # if not self.create_aux_files(ref): | |
66 # logging.warn("Auxiliary files were not created.") | |
67 # return False | |
68 | |
69 # Call variants | |
70 # FIXME: Sample ploidy = 2? | |
71 os.environ["GATK_JAR"] | |
72 cmd = "java -XX:+UseSerialGC -jar %(gatk_jar)s -T UnifiedGenotyper -R %(ref)s -I %(bam)s -o %(all_variants_file)s %(extra_cmd_options)s" % opts | |
73 success = os.system(cmd) | |
74 | |
75 if success != 0: | |
76 logging.warn("Calling variants returned non-zero exit status.") | |
77 return False | |
78 | |
79 self.last_command = cmd | |
80 | |
81 return True | |
82 | |
83 def create_aux_files(self, ref): | |
84 """Create auxiliary files needed for this variant. | |
85 | |
86 Tools needed: samtools and picard tools. Picard tools is a Java | |
87 library that can be defined using environment variable: PICARD_JAR | |
88 specifying path to picard.jar or PICARD_TOOLS_PATH specifying path | |
89 to the directory where separate jars are (older version before jars | |
90 were merged into a single picard.jar). | |
91 Parameters: | |
92 ----------- | |
93 ref: str | |
94 Path to the reference file. | |
95 | |
96 Returns: | |
97 -------- | |
98 bool: | |
99 True if auxiliary files were created, False otherwise. | |
100 """ | |
101 | |
102 ref_name, _ = os.path.splitext(ref) | |
103 | |
104 success = os.system("samtools faidx %s" % ref) | |
105 | |
106 if success != 0: | |
107 logging.warn("Fasta index could not be created.") | |
108 return False | |
109 | |
110 d = {"ref": ref, "ref_name": ref_name} | |
111 | |
112 if os.environ.get("PICARD_TOOLS_PATH"): | |
113 d["picard_tools_path"] = os.path.join(os.environ["PICARD_TOOLS_PATH"], "CreateSequenceDictionary.jar") | |
114 elif os.environ.get("PICARD_JAR"): | |
115 # This is used in newer version of PICARD tool where multiple | |
116 # jars were merged into a single jar file. | |
117 d["picard_tools_path"] = "%s %s" % (os.environ["PICARD_JAR"], "CreateSequenceDictionary") | |
118 else: | |
119 logging.error("Picard tools are not present in the path.") | |
120 return False | |
121 | |
122 success = os.system("java -jar %(picard_tools_path)s R=%(ref)s O=%(ref_name)s.dict" % d) | |
123 | |
124 if success != 0: | |
125 logging.warn("Dictionary for the %s reference could not be created", ref) | |
126 return False |