Mercurial > repos > thanhlv > humann_strain_profiler
comparison test-data/test-db/metaphlan-db/customizemapping.py @ 0:db4f6b239f5e draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
author | thanhlv |
---|---|
date | Mon, 13 Feb 2023 16:18:40 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:db4f6b239f5e |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 # Script to generate a extract a custom mapping file from input mapping file. | |
4 # Mostly used for a reduced-size demo data generation. | |
5 | |
6 | |
7 import argparse | |
8 from pathlib import Path | |
9 | |
10 | |
11 if __name__ == '__main__': | |
12 # Read command line | |
13 parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping') | |
14 parser.add_argument('--in_mapping', help="Path to mapping file to reduce") | |
15 parser.add_argument('--features', help="Path to tabular file with features to keep in first column") | |
16 parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns") | |
17 parser.add_argument('--out_mapping', help="Path to reduced mapping file") | |
18 args = parser.parse_args() | |
19 | |
20 in_mapping_fp = Path(args.in_mapping) | |
21 feature_fp = Path(args.features) | |
22 element_fp = Path(args.elements) | |
23 out_mapping_fp = Path(args.out_mapping) | |
24 | |
25 # extract features to keep | |
26 features = set() | |
27 with open(feature_fp, 'r') as feature_f: | |
28 for line in feature_f.readlines(): | |
29 features.add(line.split("\t")[0]) | |
30 print(features) | |
31 | |
32 # extract elements to keep | |
33 elements = set() | |
34 with open(element_fp, 'r') as element_f: | |
35 for line in element_f.readlines(): | |
36 elements.add(line.split("\t")[0]) | |
37 print(elements) | |
38 | |
39 # write mapping for features to keep while keeping only elements | |
40 with open(in_mapping_fp, 'r') as in_mapping_f: | |
41 with open(out_mapping_fp, 'w') as out_mapping_f: | |
42 for line in in_mapping_f.readlines(): | |
43 l_split = line.split("\t") | |
44 feat = l_split[0] | |
45 if feat in features: | |
46 to_write = [feat] | |
47 for e in l_split[1:]: | |
48 if e in elements: | |
49 to_write.append(e) | |
50 out_mapping_f.write("%s\n" % '\t'.join(to_write)) |