Mercurial > repos > bebatut > format_metaphlan2_output
view format_metaphlan2_output.py @ 1:1e74cb2c8e67 draft
"planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_metaphlan2_output/ commit 2cc71b230101205641d7fafa822d4ab3d398066a"
author | bebatut |
---|---|
date | Mon, 14 Sep 2020 09:52:15 +0000 |
parents | 2bfa9b200600 |
children | 370b56f8a02d |
line wrap: on
line source
#!/usr/bin/env python # -*- coding: utf-8 -*- import argparse taxo_level_corresp = { 'k': 'kingdom', 'p': 'phylum', 'c': 'class', 'o': 'order', 'f': 'family', 'g': 'genus', 's': 'species', 't': 'strains'} def write_taxo_abundance(output_files, level, taxo, abundance): if level not in taxo_level_corresp: raise ValueError(level + ' is not a know taxonomic level') f_n = taxo_level_corresp[level] output_files[f_n].write(taxo + '\t') output_files[f_n].write(abundance + '\n') def format_metaphlan2_output(args): taxo_levels_abund_f = { 'kingdom': open(args.kingdom_abundance_file, 'w'), 'phylum': open(args.phylum_abundance_file, 'w'), 'class': open(args.class_abundance_file, 'w'), 'order': open(args.order_abundance_file, 'w'), 'family': open(args.family_abundance_file, 'w'), 'genus': open(args.genus_abundance_file, 'w'), 'species': open(args.species_abundance_file, 'w'), 'strains': open(args.strains_abundance_file, 'w') } for taxo_level_f in taxo_levels_abund_f: s = taxo_level_f + '\t' + 'abundance\n' taxo_levels_abund_f[taxo_level_f].write(s) with open(args.metaphlan2_output, 'r') as input_f: with open(args.all_taxo_level_abundance_file, 'w') as output_f: s = "kingdom\tphylum\tclass\torder\tfamily\t" s += "genus\tspecies\tstrains\tabundance\n" output_f.write(s) levels_number = 8 for line in input_f.readlines(): if line.startswith("#"): continue split_line = line[:-1].split('\t') all_taxo = split_line[0] abundance = split_line[1] split_taxo = all_taxo.split('|') for level in split_taxo: taxo = level.split('__')[1] taxo = taxo.replace("_", " ") output_f.write(taxo + '\t') for i in range(len(split_taxo), levels_number): output_f.write('\t') output_f.write(abundance + "\n") last_taxo_level = split_taxo[-1].split('__') taxo = last_taxo_level[1].replace("_", " ") level = last_taxo_level[0] write_taxo_abundance( taxo_levels_abund_f, level, taxo, abundance) for taxo_level_f in taxo_levels_abund_f: taxo_levels_abund_f[taxo_level_f].close() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--metaphlan2_output', required=True) parser.add_argument('--all_taxo_level_abundance_file', required=True) parser.add_argument('--kingdom_abundance_file', required=True) parser.add_argument('--phylum_abundance_file', required=True) parser.add_argument('--class_abundance_file', required=True) parser.add_argument('--order_abundance_file', required=True) parser.add_argument('--family_abundance_file', required=True) parser.add_argument('--genus_abundance_file', required=True) parser.add_argument('--species_abundance_file', required=True) parser.add_argument('--strains_abundance_file', required=True) args = parser.parse_args() format_metaphlan2_output(args)