Mercurial > repos > bebatut > format_metaphlan2_output
comparison format_metaphlan2_output.py @ 1:1e74cb2c8e67 draft
"planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_metaphlan2_output/ commit 2cc71b230101205641d7fafa822d4ab3d398066a"
| author | bebatut |
|---|---|
| date | Mon, 14 Sep 2020 09:52:15 +0000 |
| parents | 2bfa9b200600 |
| children | 370b56f8a02d |
comparison
equal
deleted
inserted
replaced
| 0:2bfa9b200600 | 1:1e74cb2c8e67 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
| 3 | 3 |
| 4 import sys | |
| 5 import os | |
| 6 import argparse | 4 import argparse |
| 7 import re | |
| 8 | 5 |
| 9 taxo_level_correspondance = {} | 6 |
| 10 taxo_level_correspondance['k'] = 'kingdom' | 7 taxo_level_corresp = { |
| 11 taxo_level_correspondance['p'] = 'phylum' | 8 'k': 'kingdom', |
| 12 taxo_level_correspondance['c'] = 'class' | 9 'p': 'phylum', |
| 13 taxo_level_correspondance['o'] = 'order' | 10 'c': 'class', |
| 14 taxo_level_correspondance['f'] = 'family' | 11 'o': 'order', |
| 15 taxo_level_correspondance['g'] = 'genus' | 12 'f': 'family', |
| 16 taxo_level_correspondance['s'] = 'species' | 13 'g': 'genus', |
| 17 taxo_level_correspondance['t'] = 'strains' | 14 's': 'species', |
| 15 't': 'strains'} | |
| 16 | |
| 18 | 17 |
| 19 def write_taxo_abundance(output_files, level, taxo, abundance): | 18 def write_taxo_abundance(output_files, level, taxo, abundance): |
| 20 if not taxo_level_correspondance.has_key(level): | 19 if level not in taxo_level_corresp: |
| 21 raise ValueError(level + ' is not a know taxonomic level') | 20 raise ValueError(level + ' is not a know taxonomic level') |
| 22 output_files[taxo_level_correspondance[level]].write(taxo + '\t') | 21 f_n = taxo_level_corresp[level] |
| 23 output_files[taxo_level_correspondance[level]].write(abundance + '\n') | 22 output_files[f_n].write(taxo + '\t') |
| 23 output_files[f_n].write(abundance + '\n') | |
| 24 | |
| 24 | 25 |
| 25 def format_metaphlan2_output(args): | 26 def format_metaphlan2_output(args): |
| 26 taxo_levels_abundance_files = {} | 27 taxo_levels_abund_f = { |
| 27 taxo_levels_abundance_files['kingdom'] = open(args.kingdom_abundance_file, 'w') | 28 'kingdom': open(args.kingdom_abundance_file, 'w'), |
| 28 taxo_levels_abundance_files['phylum'] = open(args.phylum_abundance_file, 'w') | 29 'phylum': open(args.phylum_abundance_file, 'w'), |
| 29 taxo_levels_abundance_files['class'] = open(args.class_abundance_file, 'w') | 30 'class': open(args.class_abundance_file, 'w'), |
| 30 taxo_levels_abundance_files['order'] = open(args.order_abundance_file, 'w') | 31 'order': open(args.order_abundance_file, 'w'), |
| 31 taxo_levels_abundance_files['family'] = open(args.family_abundance_file, 'w') | 32 'family': open(args.family_abundance_file, 'w'), |
| 32 taxo_levels_abundance_files['genus'] = open(args.genus_abundance_file, 'w') | 33 'genus': open(args.genus_abundance_file, 'w'), |
| 33 taxo_levels_abundance_files['species'] = open(args.species_abundance_file, 'w') | 34 'species': open(args.species_abundance_file, 'w'), |
| 34 taxo_levels_abundance_files['strains'] = open(args.strains_abundance_file, 'w') | 35 'strains': open(args.strains_abundance_file, 'w') |
| 36 } | |
| 35 | 37 |
| 36 for taxo_level_file in taxo_levels_abundance_files: | 38 for taxo_level_f in taxo_levels_abund_f: |
| 37 taxo_levels_abundance_files[taxo_level_file].write(taxo_level_file + '\t') | 39 s = taxo_level_f + '\t' + 'abundance\n' |
| 38 taxo_levels_abundance_files[taxo_level_file].write('abundance\n') | 40 taxo_levels_abund_f[taxo_level_f].write(s) |
| 39 | 41 |
| 40 with open(args.metaphlan2_output, 'r') as input_file: | 42 with open(args.metaphlan2_output, 'r') as input_f: |
| 41 with open(args.all_taxo_level_abundance_file, 'w') as output_file: | 43 with open(args.all_taxo_level_abundance_file, 'w') as output_f: |
| 42 output_file.write("kingdom\t") | 44 s = "kingdom\tphylum\tclass\torder\tfamily\t" |
| 43 output_file.write("phylum\t") | 45 s += "genus\tspecies\tstrains\tabundance\n" |
| 44 output_file.write("class\t") | 46 output_f.write(s) |
| 45 output_file.write("order\t") | 47 |
| 46 output_file.write("family\t") | |
| 47 output_file.write("genus\t") | |
| 48 output_file.write("species\t") | |
| 49 output_file.write("strains\t") | |
| 50 output_file.write("abundance\n") | |
| 51 levels_number = 8 | 48 levels_number = 8 |
| 52 | 49 for line in input_f.readlines(): |
| 53 for line in input_file.readlines(): | |
| 54 if line.startswith("#"): | 50 if line.startswith("#"): |
| 55 continue | 51 continue |
| 56 | 52 |
| 57 split_line = line[:-1].split('\t') | 53 split_line = line[:-1].split('\t') |
| 58 all_taxo = split_line[0] | 54 all_taxo = split_line[0] |
| 59 abundance = split_line[1] | 55 abundance = split_line[1] |
| 60 | 56 |
| 61 split_taxo = all_taxo.split('|') | 57 split_taxo = all_taxo.split('|') |
| 62 for level in split_taxo: | 58 for level in split_taxo: |
| 63 taxo = level.split('__')[1] | 59 taxo = level.split('__')[1] |
| 64 taxo = taxo.replace("_"," ") | 60 taxo = taxo.replace("_", " ") |
| 65 output_file.write(taxo + '\t') | 61 output_f.write(taxo + '\t') |
| 66 | 62 |
| 67 for i in range(len(split_taxo), levels_number): | 63 for i in range(len(split_taxo), levels_number): |
| 68 output_file.write('\t') | 64 output_f.write('\t') |
| 69 | 65 |
| 70 output_file.write(abundance + "\n") | 66 output_f.write(abundance + "\n") |
| 71 | |
| 72 | 67 |
| 73 last_taxo_level = split_taxo[-1].split('__') | 68 last_taxo_level = split_taxo[-1].split('__') |
| 74 taxo = last_taxo_level[1].replace("_"," ") | 69 taxo = last_taxo_level[1].replace("_", " ") |
| 75 level = last_taxo_level[0] | 70 level = last_taxo_level[0] |
| 76 write_taxo_abundance(taxo_levels_abundance_files, level, taxo, | 71 write_taxo_abundance( |
| 72 taxo_levels_abund_f, | |
| 73 level, | |
| 74 taxo, | |
| 77 abundance) | 75 abundance) |
| 78 | 76 |
| 79 for taxo_level_file in taxo_levels_abundance_files: | 77 for taxo_level_f in taxo_levels_abund_f: |
| 80 taxo_levels_abundance_files[taxo_level_file].close() | 78 taxo_levels_abund_f[taxo_level_f].close() |
| 79 | |
| 81 | 80 |
| 82 if __name__ == '__main__': | 81 if __name__ == '__main__': |
| 83 parser = argparse.ArgumentParser() | 82 parser = argparse.ArgumentParser() |
| 84 parser.add_argument('--metaphlan2_output', required=True) | 83 parser.add_argument('--metaphlan2_output', required=True) |
| 85 parser.add_argument('--all_taxo_level_abundance_file', required=True) | 84 parser.add_argument('--all_taxo_level_abundance_file', required=True) |
| 89 parser.add_argument('--order_abundance_file', required=True) | 88 parser.add_argument('--order_abundance_file', required=True) |
| 90 parser.add_argument('--family_abundance_file', required=True) | 89 parser.add_argument('--family_abundance_file', required=True) |
| 91 parser.add_argument('--genus_abundance_file', required=True) | 90 parser.add_argument('--genus_abundance_file', required=True) |
| 92 parser.add_argument('--species_abundance_file', required=True) | 91 parser.add_argument('--species_abundance_file', required=True) |
| 93 parser.add_argument('--strains_abundance_file', required=True) | 92 parser.add_argument('--strains_abundance_file', required=True) |
| 94 | |
| 95 args = parser.parse_args() | 93 args = parser.parse_args() |
| 96 | 94 |
| 97 format_metaphlan2_output(args) | 95 format_metaphlan2_output(args) |
