annotate phe/variant/MPileupVariantCaller.py @ 0:834a312c0114 draft

Uploaded
author ulfschaefer
date Thu, 10 Dec 2015 09:22:39 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
1 '''
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
2 Created on 22 Sep 2015
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
3
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
4 @author: alex
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
5 '''
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
6 from collections import OrderedDict
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
7 import logging
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
8 import os
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
9 import subprocess
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
10 import tempfile
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
11
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
12 from phe.variant import VariantCaller
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
13
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
14
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
15 class MPileupVariantCaller(VariantCaller):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
16 """Implemetation of the Broad institute's variant caller."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
17
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
18 name = "mpileup"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
19 """Plain text name of the variant caller."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
20
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
21 _default_options = "-m -f GQ"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
22 """Default options for the variant caller."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
23
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
24 def __init__(self, cmd_options=None):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
25 """Constructor"""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
26 if cmd_options is None:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
27 cmd_options = self._default_options
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
28
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
29 super(MPileupVariantCaller, self).__init__(cmd_options=cmd_options)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
30
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
31 self.last_command = None
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
32
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
33 def get_info(self, plain=False):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
34 d = {"name": self.name, "version": self.get_version(), "command": self.last_command}
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
35
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
36 if plain:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
37 result = "mpileup(%(version)s): %(command)s" % d
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
38 else:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
39 result = OrderedDict(d)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
40
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
41 return result
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
42
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
43 def get_version(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
44
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
45 p = subprocess.Popen(["samtools", "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
46 (output, _) = p.communicate()
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
47
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
48 # first line is the version of the samtools
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
49 version = output.split("\n")[0].split(" ")[1]
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
50
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
51 return version
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
52
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
53 def make_vcf(self, *args, **kwargs):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
54 ref = kwargs.get("ref")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
55 bam = kwargs.get("bam")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
56
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
57 if kwargs.get("vcf_file") is None:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
58 kwargs["vcf_file"] = "variants.vcf"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
59
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
60 opts = {"ref": os.path.abspath(ref),
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
61 "bam": os.path.abspath(bam),
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
62 "all_variants_file": os.path.abspath(kwargs.get("vcf_file")),
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
63 "extra_cmd_options": self.cmd_options}
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
64
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
65 with tempfile.NamedTemporaryFile(suffix=".pileup") as tmp:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
66 opts["pileup_file"] = tmp.name
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
67 cmd = "samtools mpileup -t DP,DV,DP4,DPR,SP -Auf %(ref)s %(bam)s | bcftools call %(extra_cmd_options)s > %(all_variants_file)s" % opts
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
68 print cmd
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
69 self.last_command = cmd
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
70 if os.system(cmd) != 0:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
71 logging.warn("Pileup creation was not successful.")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
72 return False
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
73
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
74 return True
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
75
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
76 def create_aux_files(self, ref):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
77 """Index reference with faidx from samtools.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
78
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
79 Parameters:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
80 -----------
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
81 ref: str
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
82 Path to the reference file.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
83
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
84 Returns:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
85 --------
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
86 bool:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
87 True if auxiliary files were created, False otherwise.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
88 """
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
89
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
90 success = os.system("samtools faidx %s" % ref)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
91
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
92 if success != 0:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
93 logging.warn("Fasta index could not be created.")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
94 return False
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
95
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
96 return True