Mercurial > repos > thanhlv > humann_reduce_table
annotate test-data/test-db/metaphlan-db/customizemapping.py @ 0:e152169e5c44 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
author | thanhlv |
---|---|
date | Mon, 13 Feb 2023 16:23:08 +0000 |
parents | |
children |
rev | line source |
---|---|
0
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
1 #!/usr/bin/env python |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
3 # Script to generate a extract a custom mapping file from input mapping file. |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
4 # Mostly used for a reduced-size demo data generation. |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
5 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
6 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
7 import argparse |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
8 from pathlib import Path |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
9 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
10 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
11 if __name__ == '__main__': |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
12 # Read command line |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
13 parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
14 parser.add_argument('--in_mapping', help="Path to mapping file to reduce") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
15 parser.add_argument('--features', help="Path to tabular file with features to keep in first column") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
16 parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
17 parser.add_argument('--out_mapping', help="Path to reduced mapping file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
18 args = parser.parse_args() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
19 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
20 in_mapping_fp = Path(args.in_mapping) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
21 feature_fp = Path(args.features) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
22 element_fp = Path(args.elements) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
23 out_mapping_fp = Path(args.out_mapping) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
24 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
25 # extract features to keep |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
26 features = set() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
27 with open(feature_fp, 'r') as feature_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
28 for line in feature_f.readlines(): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
29 features.add(line.split("\t")[0]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
30 print(features) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
31 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
32 # extract elements to keep |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
33 elements = set() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
34 with open(element_fp, 'r') as element_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
35 for line in element_f.readlines(): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
36 elements.add(line.split("\t")[0]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
37 print(elements) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
38 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
39 # write mapping for features to keep while keeping only elements |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
40 with open(in_mapping_fp, 'r') as in_mapping_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
41 with open(out_mapping_fp, 'w') as out_mapping_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
42 for line in in_mapping_f.readlines(): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
43 l_split = line.split("\t") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
44 feat = l_split[0] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
45 if feat in features: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
46 to_write = [feat] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
47 for e in l_split[1:]: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
48 if e in elements: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
49 to_write.append(e) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
50 out_mapping_f.write("%s\n" % '\t'.join(to_write)) |