Mercurial > repos > ulfschaefer > filter_vcf
comparison phe/variant_filters/ADFilter.py @ 10:c2f8e7580133 draft
Uploaded
author | ulfschaefer |
---|---|
date | Mon, 21 Dec 2015 10:50:17 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:2e3115b4df74 | 10:c2f8e7580133 |
---|---|
1 '''Filter VCFs on AD ratio. | |
2 | |
3 Created on 24 Sep 2015 | |
4 | |
5 @author: alex | |
6 ''' | |
7 | |
8 import argparse | |
9 import logging | |
10 | |
11 from phe.variant_filters import PHEFilterBase | |
12 | |
13 | |
14 class ADFilter(PHEFilterBase): | |
15 '''Filter sites by AD ratio.''' | |
16 | |
17 | |
18 name = "ADRatio" | |
19 _default_threshold = 0.9 | |
20 parameter = "ad_ratio" | |
21 | |
22 @classmethod | |
23 def customize_parser(self, parser): | |
24 arg_name = self.parameter.replace("_", "-") | |
25 parser.add_argument("--%s" % arg_name, type=float, default=self._default_threshold, | |
26 help="Filter sites below minimum ad ratio (default: %s)" % self._default_threshold) | |
27 | |
28 def __init__(self, args): | |
29 """AD Ratio constructor.""" | |
30 # This needs to happen first, because threshold is initialised here. | |
31 super(ADFilter, self).__init__(args) | |
32 | |
33 # Change the threshold to custom dp value. | |
34 self.threshold = self._default_threshold | |
35 if isinstance(args, argparse.Namespace): | |
36 self.threshold = args.ad_ratio | |
37 elif isinstance(args, dict): | |
38 try: | |
39 self.threshold = float(args.get(self.parameter)) | |
40 except (TypeError, ValueError): | |
41 logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) | |
42 logging.error("This parameter requires to be a float!") | |
43 raise Exception("Could not create AD filter from parameters: %s" % args) | |
44 | |
45 | |
46 def __call__(self, record): | |
47 """Filter a :py:class:`vcf.model._Record`.""" | |
48 | |
49 good_record = self._check_record(record) | |
50 | |
51 if good_record is not True: | |
52 return good_record | |
53 | |
54 if len(record.samples) > 1: | |
55 logging.warn("More than 1 sample detected. Only first is considered.") | |
56 | |
57 try: | |
58 record_ad = record.samples[0].data.AD | |
59 | |
60 # FIXME: when record length is > 2, what do you do? | |
61 assert len(record_ad) == 2, "AD data is incomplete POS: %i" % record.POS | |
62 | |
63 depth = sum(record.samples[0].data.AD) | |
64 | |
65 ratio = float(record_ad[1]) / depth | |
66 except Exception: | |
67 logging.warn("Could not calculate AD ratio from %s POS: %s", record, record.POS) | |
68 ratio = None | |
69 | |
70 if ratio is None or ratio < self.threshold: | |
71 # FIXME: When ratio is None, i.e. error, what do you do? | |
72 return ratio or False | |
73 else: | |
74 return None | |
75 | |
76 def short_desc(self): | |
77 short_desc = self.__doc__ or '' | |
78 | |
79 if short_desc: | |
80 short_desc = "%s (AD ratio > %s )" % (short_desc, self.threshold) | |
81 | |
82 return short_desc |