comparison format_metaphlan2_output.py @ 2:370b56f8a02d draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_metaphlan2_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
author bgruening
date Wed, 19 Oct 2022 14:39:16 +0000
parents 1e74cb2c8e67
children
comparison
equal deleted inserted replaced
1:1e74cb2c8e67 2:370b56f8a02d
3 3
4 import argparse 4 import argparse
5 5
6 6
7 taxo_level_corresp = { 7 taxo_level_corresp = {
8 'k': 'kingdom', 8 "k": "kingdom",
9 'p': 'phylum', 9 "p": "phylum",
10 'c': 'class', 10 "c": "class",
11 'o': 'order', 11 "o": "order",
12 'f': 'family', 12 "f": "family",
13 'g': 'genus', 13 "g": "genus",
14 's': 'species', 14 "s": "species",
15 't': 'strains'} 15 "t": "strains",
16 16 }
17
17 18
18 def write_taxo_abundance(output_files, level, taxo, abundance): 19 def write_taxo_abundance(output_files, level, taxo, abundance):
19 if level not in taxo_level_corresp: 20 if level not in taxo_level_corresp:
20 raise ValueError(level + ' is not a know taxonomic level') 21 raise ValueError(level + " is not a know taxonomic level")
21 f_n = taxo_level_corresp[level] 22 f_n = taxo_level_corresp[level]
22 output_files[f_n].write(taxo + '\t') 23 output_files[f_n].write(taxo + "\t")
23 output_files[f_n].write(abundance + '\n') 24 output_files[f_n].write(abundance + "\n")
24 25
25 26
26 def format_metaphlan2_output(args): 27 def format_metaphlan2_output(args):
27 taxo_levels_abund_f = { 28 taxo_levels_abund_f = {
28 'kingdom': open(args.kingdom_abundance_file, 'w'), 29 "kingdom": open(args.kingdom_abundance_file, "w"),
29 'phylum': open(args.phylum_abundance_file, 'w'), 30 "phylum": open(args.phylum_abundance_file, "w"),
30 'class': open(args.class_abundance_file, 'w'), 31 "class": open(args.class_abundance_file, "w"),
31 'order': open(args.order_abundance_file, 'w'), 32 "order": open(args.order_abundance_file, "w"),
32 'family': open(args.family_abundance_file, 'w'), 33 "family": open(args.family_abundance_file, "w"),
33 'genus': open(args.genus_abundance_file, 'w'), 34 "genus": open(args.genus_abundance_file, "w"),
34 'species': open(args.species_abundance_file, 'w'), 35 "species": open(args.species_abundance_file, "w"),
35 'strains': open(args.strains_abundance_file, 'w') 36 "strains": open(args.strains_abundance_file, "w"),
36 } 37 }
37 38
38 for taxo_level_f in taxo_levels_abund_f: 39 for taxo_level_f in taxo_levels_abund_f:
39 s = taxo_level_f + '\t' + 'abundance\n' 40 s = taxo_level_f + "\t" + "abundance\n"
40 taxo_levels_abund_f[taxo_level_f].write(s) 41 taxo_levels_abund_f[taxo_level_f].write(s)
41 42
42 with open(args.metaphlan2_output, 'r') as input_f: 43 with open(args.metaphlan2_output, "r") as input_f:
43 with open(args.all_taxo_level_abundance_file, 'w') as output_f: 44 with open(args.all_taxo_level_abundance_file, "w") as output_f:
44 s = "kingdom\tphylum\tclass\torder\tfamily\t" 45 s = "kingdom\tphylum\tclass\torder\tfamily\t"
45 s += "genus\tspecies\tstrains\tabundance\n" 46 s += "genus\tspecies\tstrains\tabundance\n"
46 output_f.write(s) 47 output_f.write(s)
47 48
48 levels_number = 8 49 levels_number = 8
49 for line in input_f.readlines(): 50 for line in input_f.readlines():
50 if line.startswith("#"): 51 if line.startswith("#"):
51 continue 52 continue
52 53
53 split_line = line[:-1].split('\t') 54 split_line = line[:-1].split("\t")
54 all_taxo = split_line[0] 55 all_taxo = split_line[0]
55 abundance = split_line[1] 56 abundance = split_line[1]
56 57
57 split_taxo = all_taxo.split('|') 58 split_taxo = all_taxo.split("|")
58 for level in split_taxo: 59 for level in split_taxo:
59 taxo = level.split('__')[1] 60 taxo = level.split("__")[1]
60 taxo = taxo.replace("_", " ") 61 taxo = taxo.replace("_", " ")
61 output_f.write(taxo + '\t') 62 output_f.write(taxo + "\t")
62 63
63 for i in range(len(split_taxo), levels_number): 64 for i in range(len(split_taxo), levels_number):
64 output_f.write('\t') 65 output_f.write("\t")
65 66
66 output_f.write(abundance + "\n") 67 output_f.write(abundance + "\n")
67 68
68 last_taxo_level = split_taxo[-1].split('__') 69 last_taxo_level = split_taxo[-1].split("__")
69 taxo = last_taxo_level[1].replace("_", " ") 70 taxo = last_taxo_level[1].replace("_", " ")
70 level = last_taxo_level[0] 71 level = last_taxo_level[0]
71 write_taxo_abundance( 72 write_taxo_abundance(taxo_levels_abund_f, level, taxo, abundance)
72 taxo_levels_abund_f,
73 level,
74 taxo,
75 abundance)
76 73
77 for taxo_level_f in taxo_levels_abund_f: 74 for taxo_level_f in taxo_levels_abund_f:
78 taxo_levels_abund_f[taxo_level_f].close() 75 taxo_levels_abund_f[taxo_level_f].close()
79 76
80 77
81 if __name__ == '__main__': 78 if __name__ == "__main__":
82 parser = argparse.ArgumentParser() 79 parser = argparse.ArgumentParser()
83 parser.add_argument('--metaphlan2_output', required=True) 80 parser.add_argument("--metaphlan2_output", required=True)
84 parser.add_argument('--all_taxo_level_abundance_file', required=True) 81 parser.add_argument("--all_taxo_level_abundance_file", required=True)
85 parser.add_argument('--kingdom_abundance_file', required=True) 82 parser.add_argument("--kingdom_abundance_file", required=True)
86 parser.add_argument('--phylum_abundance_file', required=True) 83 parser.add_argument("--phylum_abundance_file", required=True)
87 parser.add_argument('--class_abundance_file', required=True) 84 parser.add_argument("--class_abundance_file", required=True)
88 parser.add_argument('--order_abundance_file', required=True) 85 parser.add_argument("--order_abundance_file", required=True)
89 parser.add_argument('--family_abundance_file', required=True) 86 parser.add_argument("--family_abundance_file", required=True)
90 parser.add_argument('--genus_abundance_file', required=True) 87 parser.add_argument("--genus_abundance_file", required=True)
91 parser.add_argument('--species_abundance_file', required=True) 88 parser.add_argument("--species_abundance_file", required=True)
92 parser.add_argument('--strains_abundance_file', required=True) 89 parser.add_argument("--strains_abundance_file", required=True)
93 args = parser.parse_args() 90 args = parser.parse_args()
94 91
95 format_metaphlan2_output(args) 92 format_metaphlan2_output(args)