Mercurial > repos > bebatut > format_metaphlan2_output
comparison format_metaphlan2_output.py @ 2:370b56f8a02d draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_metaphlan2_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
| author | bgruening |
|---|---|
| date | Wed, 19 Oct 2022 14:39:16 +0000 |
| parents | 1e74cb2c8e67 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1e74cb2c8e67 | 2:370b56f8a02d |
|---|---|
| 3 | 3 |
| 4 import argparse | 4 import argparse |
| 5 | 5 |
| 6 | 6 |
| 7 taxo_level_corresp = { | 7 taxo_level_corresp = { |
| 8 'k': 'kingdom', | 8 "k": "kingdom", |
| 9 'p': 'phylum', | 9 "p": "phylum", |
| 10 'c': 'class', | 10 "c": "class", |
| 11 'o': 'order', | 11 "o": "order", |
| 12 'f': 'family', | 12 "f": "family", |
| 13 'g': 'genus', | 13 "g": "genus", |
| 14 's': 'species', | 14 "s": "species", |
| 15 't': 'strains'} | 15 "t": "strains", |
| 16 | 16 } |
| 17 | |
| 17 | 18 |
| 18 def write_taxo_abundance(output_files, level, taxo, abundance): | 19 def write_taxo_abundance(output_files, level, taxo, abundance): |
| 19 if level not in taxo_level_corresp: | 20 if level not in taxo_level_corresp: |
| 20 raise ValueError(level + ' is not a know taxonomic level') | 21 raise ValueError(level + " is not a know taxonomic level") |
| 21 f_n = taxo_level_corresp[level] | 22 f_n = taxo_level_corresp[level] |
| 22 output_files[f_n].write(taxo + '\t') | 23 output_files[f_n].write(taxo + "\t") |
| 23 output_files[f_n].write(abundance + '\n') | 24 output_files[f_n].write(abundance + "\n") |
| 24 | 25 |
| 25 | 26 |
| 26 def format_metaphlan2_output(args): | 27 def format_metaphlan2_output(args): |
| 27 taxo_levels_abund_f = { | 28 taxo_levels_abund_f = { |
| 28 'kingdom': open(args.kingdom_abundance_file, 'w'), | 29 "kingdom": open(args.kingdom_abundance_file, "w"), |
| 29 'phylum': open(args.phylum_abundance_file, 'w'), | 30 "phylum": open(args.phylum_abundance_file, "w"), |
| 30 'class': open(args.class_abundance_file, 'w'), | 31 "class": open(args.class_abundance_file, "w"), |
| 31 'order': open(args.order_abundance_file, 'w'), | 32 "order": open(args.order_abundance_file, "w"), |
| 32 'family': open(args.family_abundance_file, 'w'), | 33 "family": open(args.family_abundance_file, "w"), |
| 33 'genus': open(args.genus_abundance_file, 'w'), | 34 "genus": open(args.genus_abundance_file, "w"), |
| 34 'species': open(args.species_abundance_file, 'w'), | 35 "species": open(args.species_abundance_file, "w"), |
| 35 'strains': open(args.strains_abundance_file, 'w') | 36 "strains": open(args.strains_abundance_file, "w"), |
| 36 } | 37 } |
| 37 | 38 |
| 38 for taxo_level_f in taxo_levels_abund_f: | 39 for taxo_level_f in taxo_levels_abund_f: |
| 39 s = taxo_level_f + '\t' + 'abundance\n' | 40 s = taxo_level_f + "\t" + "abundance\n" |
| 40 taxo_levels_abund_f[taxo_level_f].write(s) | 41 taxo_levels_abund_f[taxo_level_f].write(s) |
| 41 | 42 |
| 42 with open(args.metaphlan2_output, 'r') as input_f: | 43 with open(args.metaphlan2_output, "r") as input_f: |
| 43 with open(args.all_taxo_level_abundance_file, 'w') as output_f: | 44 with open(args.all_taxo_level_abundance_file, "w") as output_f: |
| 44 s = "kingdom\tphylum\tclass\torder\tfamily\t" | 45 s = "kingdom\tphylum\tclass\torder\tfamily\t" |
| 45 s += "genus\tspecies\tstrains\tabundance\n" | 46 s += "genus\tspecies\tstrains\tabundance\n" |
| 46 output_f.write(s) | 47 output_f.write(s) |
| 47 | 48 |
| 48 levels_number = 8 | 49 levels_number = 8 |
| 49 for line in input_f.readlines(): | 50 for line in input_f.readlines(): |
| 50 if line.startswith("#"): | 51 if line.startswith("#"): |
| 51 continue | 52 continue |
| 52 | 53 |
| 53 split_line = line[:-1].split('\t') | 54 split_line = line[:-1].split("\t") |
| 54 all_taxo = split_line[0] | 55 all_taxo = split_line[0] |
| 55 abundance = split_line[1] | 56 abundance = split_line[1] |
| 56 | 57 |
| 57 split_taxo = all_taxo.split('|') | 58 split_taxo = all_taxo.split("|") |
| 58 for level in split_taxo: | 59 for level in split_taxo: |
| 59 taxo = level.split('__')[1] | 60 taxo = level.split("__")[1] |
| 60 taxo = taxo.replace("_", " ") | 61 taxo = taxo.replace("_", " ") |
| 61 output_f.write(taxo + '\t') | 62 output_f.write(taxo + "\t") |
| 62 | 63 |
| 63 for i in range(len(split_taxo), levels_number): | 64 for i in range(len(split_taxo), levels_number): |
| 64 output_f.write('\t') | 65 output_f.write("\t") |
| 65 | 66 |
| 66 output_f.write(abundance + "\n") | 67 output_f.write(abundance + "\n") |
| 67 | 68 |
| 68 last_taxo_level = split_taxo[-1].split('__') | 69 last_taxo_level = split_taxo[-1].split("__") |
| 69 taxo = last_taxo_level[1].replace("_", " ") | 70 taxo = last_taxo_level[1].replace("_", " ") |
| 70 level = last_taxo_level[0] | 71 level = last_taxo_level[0] |
| 71 write_taxo_abundance( | 72 write_taxo_abundance(taxo_levels_abund_f, level, taxo, abundance) |
| 72 taxo_levels_abund_f, | |
| 73 level, | |
| 74 taxo, | |
| 75 abundance) | |
| 76 | 73 |
| 77 for taxo_level_f in taxo_levels_abund_f: | 74 for taxo_level_f in taxo_levels_abund_f: |
| 78 taxo_levels_abund_f[taxo_level_f].close() | 75 taxo_levels_abund_f[taxo_level_f].close() |
| 79 | 76 |
| 80 | 77 |
| 81 if __name__ == '__main__': | 78 if __name__ == "__main__": |
| 82 parser = argparse.ArgumentParser() | 79 parser = argparse.ArgumentParser() |
| 83 parser.add_argument('--metaphlan2_output', required=True) | 80 parser.add_argument("--metaphlan2_output", required=True) |
| 84 parser.add_argument('--all_taxo_level_abundance_file', required=True) | 81 parser.add_argument("--all_taxo_level_abundance_file", required=True) |
| 85 parser.add_argument('--kingdom_abundance_file', required=True) | 82 parser.add_argument("--kingdom_abundance_file", required=True) |
| 86 parser.add_argument('--phylum_abundance_file', required=True) | 83 parser.add_argument("--phylum_abundance_file", required=True) |
| 87 parser.add_argument('--class_abundance_file', required=True) | 84 parser.add_argument("--class_abundance_file", required=True) |
| 88 parser.add_argument('--order_abundance_file', required=True) | 85 parser.add_argument("--order_abundance_file", required=True) |
| 89 parser.add_argument('--family_abundance_file', required=True) | 86 parser.add_argument("--family_abundance_file", required=True) |
| 90 parser.add_argument('--genus_abundance_file', required=True) | 87 parser.add_argument("--genus_abundance_file", required=True) |
| 91 parser.add_argument('--species_abundance_file', required=True) | 88 parser.add_argument("--species_abundance_file", required=True) |
| 92 parser.add_argument('--strains_abundance_file', required=True) | 89 parser.add_argument("--strains_abundance_file", required=True) |
| 93 args = parser.parse_args() | 90 args = parser.parse_args() |
| 94 | 91 |
| 95 format_metaphlan2_output(args) | 92 format_metaphlan2_output(args) |
