annotate phe/variant/MPileupVariantCaller.py @ 22:96f393ad7fc6 draft default tip

Uploaded
author ulfschaefer
date Wed, 23 Dec 2015 04:50:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
22
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
1 '''
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
2 Created on 22 Sep 2015
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
3
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
4 @author: alex
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
5 '''
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
6 from collections import OrderedDict
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
7 import logging
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
8 import os
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
9 import subprocess
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
10 import tempfile
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
11
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
12 from phe.variant import VariantCaller
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
13
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
14
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
15 class MPileupVariantCaller(VariantCaller):
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
16 """Implemetation of the Broad institute's variant caller."""
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
17
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
18 name = "mpileup"
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
19 """Plain text name of the variant caller."""
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
20
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
21 _default_options = "-m -f GQ"
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
22 """Default options for the variant caller."""
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
23
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
24 def __init__(self, cmd_options=None):
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
25 """Constructor"""
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
26 if cmd_options is None:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
27 cmd_options = self._default_options
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
28
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
29 super(MPileupVariantCaller, self).__init__(cmd_options=cmd_options)
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
30
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
31 self.last_command = None
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
32
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
33 def get_info(self, plain=False):
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
34 d = {"name": self.name, "version": self.get_version(), "command": self.last_command}
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
35
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
36 if plain:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
37 result = "mpileup(%(version)s): %(command)s" % d
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
38 else:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
39 result = OrderedDict(d)
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
40
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
41 return result
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
42
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
43 def get_version(self):
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
44
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
45 p = subprocess.Popen(["samtools", "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
46 (output, _) = p.communicate()
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
47
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
48 # first line is the version of the samtools
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
49 version = output.split("\n")[0].split(" ")[1]
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
50
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
51 return version
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
52
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
53 def make_vcf(self, *args, **kwargs):
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
54 ref = kwargs.get("ref")
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
55 bam = kwargs.get("bam")
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
56
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
57 make_aux = kwargs.get("make_aux", False)
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
58
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
59 if kwargs.get("vcf_file") is None:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
60 kwargs["vcf_file"] = "variants.vcf"
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
61
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
62 opts = {"ref": os.path.abspath(ref),
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
63 "bam": os.path.abspath(bam),
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
64 "all_variants_file": os.path.abspath(kwargs.get("vcf_file")),
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
65 "extra_cmd_options": self.cmd_options}
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
66
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
67 if make_aux:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
68 if not self.create_aux_files(ref):
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
69 logging.warn("Auxiliary files were not created.")
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
70 return False
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
71
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
72 with tempfile.NamedTemporaryFile(suffix=".pileup") as tmp:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
73 opts["pileup_file"] = tmp.name
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
74 cmd = "samtools mpileup -t DP,DV,DP4,DPR,SP -Auf %(ref)s %(bam)s | bcftools call %(extra_cmd_options)s > %(all_variants_file)s" % opts
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
75 print cmd
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
76 self.last_command = cmd
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
77 if os.system(cmd) != 0:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
78 logging.warn("Pileup creation was not successful.")
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
79 return False
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
80
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
81 return True
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
82
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
83 def create_aux_files(self, ref):
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
84 """Index reference with faidx from samtools.
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
85
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
86 Parameters:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
87 -----------
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
88 ref: str
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
89 Path to the reference file.
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
90
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
91 Returns:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
92 --------
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
93 bool:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
94 True if auxiliary files were created, False otherwise.
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
95 """
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
96
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
97 success = os.system("samtools faidx %s" % ref)
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
98
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
99 if success != 0:
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
100 logging.warn("Fasta index could not be created.")
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
101 return False
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
102
96f393ad7fc6 Uploaded
ulfschaefer
parents:
diff changeset
103 return True