annotate phe/variant_filters/DepthFilter.py @ 8:5932a130395e draft

Uploaded
author ulfschaefer
date Fri, 18 Dec 2015 07:30:22 -0500
parents 834a312c0114
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
1 '''Filter VCF on depth of coverage.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
2
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
3 Created on 24 Sep 2015
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
4
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
5 @author: alex
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
6 '''
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
7 import argparse
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
8 import logging
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
9
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
10 from phe.variant_filters import PHEFilterBase
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
11
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
12
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
13 class DepthFilter(PHEFilterBase):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
14 """Filter sites by depth."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
15
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
16 name = "MinDepth"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
17 _default_threshold = 5
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
18 parameter = "min_depth"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
19
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
20 @classmethod
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
21 def customize_parser(self, parser):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
22 arg_name = self.parameter.replace("_", "-")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
23 parser.add_argument("--" % arg_name, type=int, default=self._default_threshold,
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
24 help="Filter sites below minimum depth (default: %s)" % self._default_threshold)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
25
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
26 def __init__(self, args):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
27 """Min Depth constructor."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
28 # This needs to happen first, because threshold is initialised here.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
29 super(DepthFilter, self).__init__(args)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
30
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
31 # Change the threshold to custom dp value.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
32 self.threshold = self._default_threshold
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
33 if isinstance(args, argparse.Namespace):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
34 self.threshold = args.min_depth
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
35 elif isinstance(args, dict):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
36 try:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
37 self.threshold = int(args.get(self.parameter))
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
38 except TypeError:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
39 logging.error("Could not retrieve threshold from %s", args.get(self.parameter))
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
40 self.threshold = None
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
41
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
42 def __call__(self, record):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
43 """Filter a :py:class:`vcf.model._Record`."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
44
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
45 if len(record.samples) > 1:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
46 logging.warn("Currently we only filter VCFs with 1 sample. Only first sample will be used.")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
47
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
48 try:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
49 record_dp = record.samples[0].data.DP
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
50 except AttributeError:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
51 record_dp = None
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
52
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
53 if record_dp is None:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
54 # logging.debug("Falling back to INFO DP")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
55 record_dp = record.INFO.get("DP")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
56
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
57 if record_dp is None or record_dp < self.threshold:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
58 return record_dp or False
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
59 else:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
60 return None
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
61
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
62 def short_desc(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
63 short_desc = self.__doc__ or ''
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
64
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
65 if short_desc:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
66 short_desc = "%s (DP > %i)" % (short_desc, self.threshold)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
67
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
68 return short_desc