annotate phe/variant_filters/__init__.py @ 0:834a312c0114 draft

Uploaded
author ulfschaefer
date Thu, 10 Dec 2015 09:22:39 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
1 """Classes and functions for working with variant filters."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
2
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
3 from __builtin__ import __import__
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
4 from abc import abstractproperty
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
5 import abc
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
6 import argparse
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
7 import glob
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
8 import inspect
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
9 import logging
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
10 import os
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
11 import re
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
12 import sys
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
13
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
14 import vcf
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
15 import vcf.filters
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
16 from vcf.parser import _Filter
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
17
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
18 IUPAC_CODES = {frozenset(["A", "G"]): "R",
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
19 frozenset(["C", "T"]): "Y",
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
20 frozenset(["G", "C"]): "S",
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
21 frozenset(["A", "T"]): "W",
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
22 frozenset(["G", "T"]): "K",
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
23 frozenset(["A", "C"]): "M",
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
24 frozenset(["C", "G", "T"]): "B",
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
25 frozenset(["A", "G", "T"]): "D",
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
26 frozenset(["A", "C", "T"]): "H",
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
27 frozenset(["A", "C", "G"]): "V"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
28 }
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
29
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
30 class PHEFilterBase(vcf.filters.Base):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
31 """Base class for VCF filters."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
32 __meta__ = abc.ABCMeta
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
33
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
34 magic_sep = ":"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
35 decoder_pattern = re.compile(magic_sep)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
36
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
37 @abc.abstractproperty
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
38 def parameter(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
39 """Short name of parameter being filtered."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
40 return self.parameter
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
41
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
42 @abc.abstractproperty
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
43 def _default_threshold(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
44 """Default threshold for filtering."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
45 return self._default_threshold
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
46
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
47 def __init__(self, args):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
48 super(PHEFilterBase, self).__init__(args)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
49
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
50 # Change the threshold to custom gq value.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
51 self.threshold = self._default_threshold
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
52
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
53 if isinstance(args, dict):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
54 self.threshold = args.get(self.parameter)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
55
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
56 def __str__(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
57 return self.filter_name()
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
58
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
59 @abc.abstractmethod
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
60 def short_desc(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
61 """Short description of the filter (included in VCF)."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
62 raise NotImplementedError("Get short description is not implemented.")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
63
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
64 def get_config(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
65 """This is used for reconstructing filter."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
66 return {self.parameter: self.threshold}
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
67
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
68 def filter_name(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
69 """Create filter names by their parameter separated by magic.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
70 E.g. if filter parameter is ad_ratio and threshold is 0.9 then
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
71 ad_ratio:0.9 if the filter name.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
72 """
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
73 return "%s%s%s" % (self.parameter, self.magic_sep, self.threshold)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
74
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
75 @staticmethod
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
76 def decode(filter_id):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
77 """Decode name of filter."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
78 conf = {}
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
79
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
80 if PHEFilterBase.magic_sep in filter_id:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
81 info = PHEFilterBase.decoder_pattern.split(filter_id)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
82 assert len(info) == 2
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
83 conf[info[0]] = info[1]
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
84 return conf
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
85
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
86 def is_gap(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
87 return False
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
88
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
89 def is_n(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
90 return True
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
91
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
92 @staticmethod
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
93 def call_concensus(record):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
94 extended_code = "N"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
95 try:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
96 sample_ad = set([str(c) for c in record.ALT] + [record.REF])
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
97
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
98
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
99 for code, cov in IUPAC_CODES.items():
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
100 if sample_ad == cov:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
101 extended_code = code
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
102 break
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
103 except AttributeError:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
104 extended_code = "N"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
105
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
106 return extended_code
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
107
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
108 def dynamic_filter_loader():
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
109 """Fancy way of dynamically importing existing filters.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
110
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
111 Returns
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
112 -------
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
113 dict:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
114 Available filters dictionary. Keys are parameters that
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
115 can be supplied to the filters.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
116 """
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
117
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
118 # We assume the filters are in the same directory as THIS file.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
119 filter_dir = os.path.dirname(__file__)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
120 filter_dir = os.path.abspath(filter_dir)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
121
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
122 # This is populated when the module is first imported.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
123 avail_filters = {}
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
124
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
125 # Add this directory to the syspath.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
126 sys.path.insert(0, filter_dir)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
127
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
128 # Find all "py" files.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
129 for filter_mod in glob.glob(os.path.join(filter_dir, "*.py")):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
130
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
131 # Derive name of the module where filter is.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
132 filter_mod_file = os.path.basename(filter_mod)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
133
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
134 # Ignore this file, obviously.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
135 if filter_mod_file.startswith("__init__"):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
136 continue
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
137
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
138 # Import the module with a filter.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
139 mod = __import__(filter_mod_file.replace(".pyc", "").replace(".py", ""))
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
140
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
141 # Find all the classes contained in this module.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
142 classes = inspect.getmembers(mod, inspect.isclass)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
143 for cls_name, cls in classes:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
144 # For each class, if it is a sublass of PHEFilterBase, add it.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
145 if cls_name != "PHEFilterBase" and issubclass(cls, PHEFilterBase):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
146 # The parameters are inherited and defined within each filter.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
147 avail_filters[cls.parameter] = cls
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
148
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
149 sys.path.remove(filter_dir)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
150
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
151 return avail_filters
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
152
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
153 _avail_filters = dynamic_filter_loader()
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
154
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
155 def available_filters():
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
156 """Return list of available filters."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
157 return _avail_filters.keys()
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
158
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
159 def str_to_filters(filters):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
160 """Convert from filter string to array of filters.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
161 E.g. ad_ration:0.9,min_depth:5
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
162
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
163 Parameters:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
164 -----------
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
165 filters: str
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
166 String version of filters, separated by comma.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
167
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
168 Returns:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
169 --------
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
170 list:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
171 List of :py:class:`phe.variant_filters.PHEFilterBase` instances.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
172 """
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
173
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
174 config = {}
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
175 for kv_pair in filters.split(","):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
176 pair = kv_pair.split(":")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
177 assert len(pair) == 2, "Filters should be separated by ':' %s" % kv_pair
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
178
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
179 # We don't care about casting them to correct type because Filters
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
180 # will do it for us.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
181 config[pair[0]] = pair[1]
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
182
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
183 return make_filters(config)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
184
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
185 def make_filters(config):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
186 """Create a list of filters from *config*.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
187
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
188 Parameters:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
189 -----------
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
190 config: dict, optional
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
191 Dictionary with parameter: value pairs. For each parameter, an
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
192 appropriate Filter will be found and instanciated.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
193
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
194 Returns:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
195 --------
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
196 list:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
197 List of :py:class:`PHEFilterBase` filters.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
198 """
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
199 filters = []
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
200
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
201 if config:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
202 for custom_filter in config:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
203 if custom_filter in _avail_filters:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
204 filters.append(_avail_filters[custom_filter](config))
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
205 else:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
206 logging.warn("Could not find appropriate filter for %s",
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
207 custom_filter)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
208
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
209 return filters