Mercurial > repos > iuc > humann_join_tables
annotate test-data/test-db/metaphlan-db/customizemapping.py @ 3:6c3271894a95 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 32d4c566afe55179531fdd106dc5a996cc09bbb1
| author | iuc | 
|---|---|
| date | Wed, 29 Mar 2023 20:02:22 +0000 | 
| parents | fd7ebf31b030 | 
| children | 
| rev | line source | 
|---|---|
| 2 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 2 # -*- coding: utf-8 -*- | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 3 # Script to generate a extract a custom mapping file from input mapping file. | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 4 # Mostly used for a reduced-size demo data generation. | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 5 | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 6 | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 7 import argparse | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 8 from pathlib import Path | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 9 | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 10 | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 11 if __name__ == '__main__': | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 12 # Read command line | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 13 parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping') | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 14 parser.add_argument('--in_mapping', help="Path to mapping file to reduce") | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 15 parser.add_argument('--features', help="Path to tabular file with features to keep in first column") | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 16 parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns") | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 17 parser.add_argument('--out_mapping', help="Path to reduced mapping file") | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 18 args = parser.parse_args() | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 19 | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 20 in_mapping_fp = Path(args.in_mapping) | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 21 feature_fp = Path(args.features) | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 22 element_fp = Path(args.elements) | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 23 out_mapping_fp = Path(args.out_mapping) | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 24 | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 25 # extract features to keep | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 26 features = set() | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 27 with open(feature_fp, 'r') as feature_f: | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 28 for line in feature_f.readlines(): | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 29 features.add(line.split("\t")[0]) | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 30 print(features) | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 31 | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 32 # extract elements to keep | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 33 elements = set() | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 34 with open(element_fp, 'r') as element_f: | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 35 for line in element_f.readlines(): | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 36 elements.add(line.split("\t")[0]) | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 37 print(elements) | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 38 | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 39 # write mapping for features to keep while keeping only elements | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 40 with open(in_mapping_fp, 'r') as in_mapping_f: | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 41 with open(out_mapping_fp, 'w') as out_mapping_f: | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 42 for line in in_mapping_f.readlines(): | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 43 l_split = line.split("\t") | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 44 feat = l_split[0] | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 45 if feat in features: | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 46 to_write = [feat] | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 47 for e in l_split[1:]: | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 48 if e in elements: | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 49 to_write.append(e) | 
| 
fd7ebf31b030
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
 iuc parents: diff
changeset | 50 out_mapping_f.write("%s\n" % '\t'.join(to_write)) | 
