annotate test-data/test-db/metaphlan-db/customizemapping.py @ 4:12fb63b5f63f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0a3abeca0535457ddf39c394001d61cd2e45afdd
author iuc
date Mon, 24 Apr 2023 18:58:44 +0000
parents d7e9a3c41657
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
1 #!/usr/bin/env python
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
2 # -*- coding: utf-8 -*-
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
3 # Script to generate a extract a custom mapping file from input mapping file.
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
4 # Mostly used for a reduced-size demo data generation.
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
5
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
6
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
7 import argparse
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
8 from pathlib import Path
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
9
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
10
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
11 if __name__ == '__main__':
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
12 # Read command line
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
13 parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping')
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
14 parser.add_argument('--in_mapping', help="Path to mapping file to reduce")
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
15 parser.add_argument('--features', help="Path to tabular file with features to keep in first column")
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
16 parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns")
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
17 parser.add_argument('--out_mapping', help="Path to reduced mapping file")
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
18 args = parser.parse_args()
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
19
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
20 in_mapping_fp = Path(args.in_mapping)
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
21 feature_fp = Path(args.features)
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
22 element_fp = Path(args.elements)
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
23 out_mapping_fp = Path(args.out_mapping)
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
24
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
25 # extract features to keep
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
26 features = set()
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
27 with open(feature_fp, 'r') as feature_f:
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
28 for line in feature_f.readlines():
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
29 features.add(line.split("\t")[0])
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
30 print(features)
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
31
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
32 # extract elements to keep
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
33 elements = set()
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
34 with open(element_fp, 'r') as element_f:
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
35 for line in element_f.readlines():
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
36 elements.add(line.split("\t")[0])
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
37 print(elements)
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
38
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
39 # write mapping for features to keep while keeping only elements
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
40 with open(in_mapping_fp, 'r') as in_mapping_f:
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
41 with open(out_mapping_fp, 'w') as out_mapping_f:
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
42 for line in in_mapping_f.readlines():
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
43 l_split = line.split("\t")
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
44 feat = l_split[0]
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
45 if feat in features:
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
46 to_write = [feat]
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
47 for e in l_split[1:]:
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
48 if e in elements:
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
49 to_write.append(e)
d7e9a3c41657 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
iuc
parents:
diff changeset
50 out_mapping_f.write("%s\n" % '\t'.join(to_write))