Mercurial > repos > ulfschaefer > filter_vcf
comparison phe/variant_filters/ADFilter.py @ 0:834a312c0114 draft
Uploaded
author | ulfschaefer |
---|---|
date | Thu, 10 Dec 2015 09:22:39 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:834a312c0114 |
---|---|
1 '''Filter VCFs on AD ratio. | |
2 | |
3 Created on 24 Sep 2015 | |
4 | |
5 @author: alex | |
6 ''' | |
7 | |
8 import argparse | |
9 import logging | |
10 | |
11 from phe.variant_filters import PHEFilterBase | |
12 | |
13 | |
14 class ADFilter(PHEFilterBase): | |
15 '''Filter sites by AD ratio.''' | |
16 | |
17 | |
18 name = "ADRatio" | |
19 _default_threshold = 0.9 | |
20 parameter = "ad_ratio" | |
21 | |
22 @classmethod | |
23 def customize_parser(self, parser): | |
24 arg_name = self.parameter.replace("_", "-") | |
25 parser.add_argument("--%s" % arg_name, type=float, default=self._default_threshold, | |
26 help="Filter sites below minimum ad ratio (default: %s)" % self._default_threshold) | |
27 | |
28 def __init__(self, args): | |
29 """AD Ratio constructor.""" | |
30 # This needs to happen first, because threshold is initialised here. | |
31 super(ADFilter, self).__init__(args) | |
32 | |
33 # Change the threshold to custom dp value. | |
34 self.threshold = self._default_threshold | |
35 if isinstance(args, argparse.Namespace): | |
36 self.threshold = args.ad_ratio | |
37 elif isinstance(args, dict): | |
38 try: | |
39 self.threshold = float(args.get(self.parameter)) | |
40 except TypeError: | |
41 logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) | |
42 self.threshold = None | |
43 | |
44 | |
45 def __call__(self, record): | |
46 """Filter a :py:class:`vcf.model._Record`.""" | |
47 | |
48 if not record.is_snp: | |
49 return None | |
50 | |
51 if len(record.samples) > 1: | |
52 logging.warn("More than 1 sample detected. Only first is considered.") | |
53 | |
54 try: | |
55 record_ad = record.samples[0].data.AD | |
56 | |
57 # FIXME: when record length is > 2, what do you do? | |
58 assert len(record_ad) == 2, "AD data is incomplete POS: %i" % record.POS | |
59 | |
60 depth = sum(record.samples[0].data.AD) | |
61 | |
62 ratio = float(record_ad[1]) / depth | |
63 except Exception: | |
64 logging.error("Could not calculate AD ratio from %s POS: %s", record_ad, record.POS) | |
65 ratio = None | |
66 | |
67 if ratio is None or ratio < self.threshold: | |
68 # FIXME: When ratio is None, i.e. error, what do you do? | |
69 return ratio or False | |
70 else: | |
71 return None | |
72 | |
73 def short_desc(self): | |
74 short_desc = self.__doc__ or '' | |
75 | |
76 if short_desc: | |
77 short_desc = "%s (AD ratio > %s )" % (short_desc, self.threshold) | |
78 | |
79 return short_desc |