annotate phe/variant/MPileupVariantCaller.py @ 20:7ac17b6d031e draft

Uploaded
author ulfschaefer
date Fri, 18 Dec 2015 07:31:09 -0500
parents f72039c5faa4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
14
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
1 '''
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
2 Created on 22 Sep 2015
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
3
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
4 @author: alex
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
5 '''
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
6 from collections import OrderedDict
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
7 import logging
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
8 import os
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
9 import subprocess
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
10 import tempfile
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
11
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
12 from phe.variant import VariantCaller
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
13
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
14
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
15 class MPileupVariantCaller(VariantCaller):
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
16 """Implemetation of the Broad institute's variant caller."""
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
17
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
18 name = "mpileup"
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
19 """Plain text name of the variant caller."""
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
20
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
21 _default_options = "-m -f GQ"
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
22 """Default options for the variant caller."""
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
23
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
24 def __init__(self, cmd_options=None):
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
25 """Constructor"""
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
26 if cmd_options is None:
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
27 cmd_options = self._default_options
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
28
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
29 super(MPileupVariantCaller, self).__init__(cmd_options=cmd_options)
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
30
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
31 self.last_command = None
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
32
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
33 def get_info(self, plain=False):
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
34 d = {"name": self.name, "version": self.get_version(), "command": self.last_command}
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
35
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
36 if plain:
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
37 result = "mpileup(%(version)s): %(command)s" % d
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
38 else:
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
39 result = OrderedDict(d)
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
40
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
41 return result
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
42
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
43 def get_version(self):
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
44
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
45 p = subprocess.Popen(["samtools", "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
46 (output, _) = p.communicate()
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
47
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
48 # first line is the version of the samtools
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
49 version = output.split("\n")[0].split(" ")[1]
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
50
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
51 return version
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
52
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
53 def make_vcf(self, *args, **kwargs):
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
54 ref = kwargs.get("ref")
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
55 bam = kwargs.get("bam")
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
56
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
57 if kwargs.get("vcf_file") is None:
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
58 kwargs["vcf_file"] = "variants.vcf"
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
59
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
60 opts = {"ref": os.path.abspath(ref),
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
61 "bam": os.path.abspath(bam),
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
62 "all_variants_file": os.path.abspath(kwargs.get("vcf_file")),
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
63 "extra_cmd_options": self.cmd_options}
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
64
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
65 with tempfile.NamedTemporaryFile(suffix=".pileup") as tmp:
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
66 opts["pileup_file"] = tmp.name
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
67 cmd = "samtools mpileup -t DP,DV,DP4,DPR,SP -Auf %(ref)s %(bam)s | bcftools call %(extra_cmd_options)s > %(all_variants_file)s" % opts
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
68 print cmd
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
69 self.last_command = cmd
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
70 if os.system(cmd) != 0:
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
71 logging.warn("Pileup creation was not successful.")
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
72 return False
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
73
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
74 return True
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
75
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
76 def create_aux_files(self, ref):
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
77 """Index reference with faidx from samtools.
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
78
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
79 Parameters:
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
80 -----------
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
81 ref: str
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
82 Path to the reference file.
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
83
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
84 Returns:
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
85 --------
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
86 bool:
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
87 True if auxiliary files were created, False otherwise.
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
88 """
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
89
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
90 success = os.system("samtools faidx %s" % ref)
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
91
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
92 if success != 0:
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
93 logging.warn("Fasta index could not be created.")
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
94 return False
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
95
f72039c5faa4 Uploaded
ulfschaefer
parents:
diff changeset
96 return True