annotate filter_vcf.py @ 11:cd59be4a7fe3 draft default tip

Uploaded
author ulfschaefer
date Mon, 21 Dec 2015 11:12:19 -0500
parents 834a312c0114
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
1 #!/usr/bin/env python
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
2 '''Simple VCF parser using custom filters.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
3
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
4 Created on 6 Oct 2015
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
5
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
6 @author: alex
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
7 '''
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
8 import argparse
11
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
9 import logging
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
10 import yaml
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
11
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
12 from phe.variant import VariantSet
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
13
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
14
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
15 def get_args():
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
16
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
17 args = argparse.ArgumentParser()
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
18
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
19 args.add_argument("--vcf", "-v", required=True, help="VCF file to (re)filter.")
11
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
20
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
21 group = args.add_mutually_exclusive_group()
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
22
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
23 group.add_argument("--filters", "-f", help="Filter(s) to apply as key:threshold pairs, separated by comma.")
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
24 group.add_argument("--config", "-c", help="Config with filters in YAML format. E.g.filters:-key:value")
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
25
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
26 args.add_argument("--output", "-o", required=True, help="Location for filtered VCF to be written.")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
27
11
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
28 args.add_argument("--only-good", action="store_true", default=False, help="Write only variants that PASS all filters (default all variants are written).")
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
29
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
30 args.add_argument("--debug", action="store_true", default=False, help="Make output more verbose.")
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
31
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
32 return args.parse_args()
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
33
11
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
34 def load_config(config_path):
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
35 with open(config_path) as fp:
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
36 config = yaml.load(fp)
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
37
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
38 return config.get("filters", {})
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
39
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
40 def main():
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
41 args = get_args()
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
42
11
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
43 log_level = logging.DEBUG if args.debug else logging.INFO
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
44 logging.basicConfig(format="[%(asctime)s] %(levelname)s: %(message)s",
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
45 level=log_level)
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
46
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
47 if args.config is not None:
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
48 args.filters = load_config(args.config)
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
49 elif args.filters is None:
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
50 logging.error("Either --config or --filters needs to be specified.")
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
51 return 1
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
52
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
53 var_set = VariantSet(args.vcf, filters=args.filters)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
54
11
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
55 if args.filters:
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
56 var_set.filter_variants()
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
57
11
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
58 var_set.write_variants(args.output, only_good=args.only_good)
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
59
cd59be4a7fe3 Uploaded
ulfschaefer
parents: 0
diff changeset
60
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
61
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
62 if __name__ == '__main__':
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
63 exit(main())