comparison format_metaphlan2_output.py @ 1:1e74cb2c8e67 draft

"planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_metaphlan2_output/ commit 2cc71b230101205641d7fafa822d4ab3d398066a"
author bebatut
date Mon, 14 Sep 2020 09:52:15 +0000
parents 2bfa9b200600
children 370b56f8a02d
comparison
equal deleted inserted replaced
0:2bfa9b200600 1:1e74cb2c8e67
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 3
4 import sys
5 import os
6 import argparse 4 import argparse
7 import re
8 5
9 taxo_level_correspondance = {} 6
10 taxo_level_correspondance['k'] = 'kingdom' 7 taxo_level_corresp = {
11 taxo_level_correspondance['p'] = 'phylum' 8 'k': 'kingdom',
12 taxo_level_correspondance['c'] = 'class' 9 'p': 'phylum',
13 taxo_level_correspondance['o'] = 'order' 10 'c': 'class',
14 taxo_level_correspondance['f'] = 'family' 11 'o': 'order',
15 taxo_level_correspondance['g'] = 'genus' 12 'f': 'family',
16 taxo_level_correspondance['s'] = 'species' 13 'g': 'genus',
17 taxo_level_correspondance['t'] = 'strains' 14 's': 'species',
15 't': 'strains'}
16
18 17
19 def write_taxo_abundance(output_files, level, taxo, abundance): 18 def write_taxo_abundance(output_files, level, taxo, abundance):
20 if not taxo_level_correspondance.has_key(level): 19 if level not in taxo_level_corresp:
21 raise ValueError(level + ' is not a know taxonomic level') 20 raise ValueError(level + ' is not a know taxonomic level')
22 output_files[taxo_level_correspondance[level]].write(taxo + '\t') 21 f_n = taxo_level_corresp[level]
23 output_files[taxo_level_correspondance[level]].write(abundance + '\n') 22 output_files[f_n].write(taxo + '\t')
23 output_files[f_n].write(abundance + '\n')
24
24 25
25 def format_metaphlan2_output(args): 26 def format_metaphlan2_output(args):
26 taxo_levels_abundance_files = {} 27 taxo_levels_abund_f = {
27 taxo_levels_abundance_files['kingdom'] = open(args.kingdom_abundance_file, 'w') 28 'kingdom': open(args.kingdom_abundance_file, 'w'),
28 taxo_levels_abundance_files['phylum'] = open(args.phylum_abundance_file, 'w') 29 'phylum': open(args.phylum_abundance_file, 'w'),
29 taxo_levels_abundance_files['class'] = open(args.class_abundance_file, 'w') 30 'class': open(args.class_abundance_file, 'w'),
30 taxo_levels_abundance_files['order'] = open(args.order_abundance_file, 'w') 31 'order': open(args.order_abundance_file, 'w'),
31 taxo_levels_abundance_files['family'] = open(args.family_abundance_file, 'w') 32 'family': open(args.family_abundance_file, 'w'),
32 taxo_levels_abundance_files['genus'] = open(args.genus_abundance_file, 'w') 33 'genus': open(args.genus_abundance_file, 'w'),
33 taxo_levels_abundance_files['species'] = open(args.species_abundance_file, 'w') 34 'species': open(args.species_abundance_file, 'w'),
34 taxo_levels_abundance_files['strains'] = open(args.strains_abundance_file, 'w') 35 'strains': open(args.strains_abundance_file, 'w')
36 }
35 37
36 for taxo_level_file in taxo_levels_abundance_files: 38 for taxo_level_f in taxo_levels_abund_f:
37 taxo_levels_abundance_files[taxo_level_file].write(taxo_level_file + '\t') 39 s = taxo_level_f + '\t' + 'abundance\n'
38 taxo_levels_abundance_files[taxo_level_file].write('abundance\n') 40 taxo_levels_abund_f[taxo_level_f].write(s)
39 41
40 with open(args.metaphlan2_output, 'r') as input_file: 42 with open(args.metaphlan2_output, 'r') as input_f:
41 with open(args.all_taxo_level_abundance_file, 'w') as output_file: 43 with open(args.all_taxo_level_abundance_file, 'w') as output_f:
42 output_file.write("kingdom\t") 44 s = "kingdom\tphylum\tclass\torder\tfamily\t"
43 output_file.write("phylum\t") 45 s += "genus\tspecies\tstrains\tabundance\n"
44 output_file.write("class\t") 46 output_f.write(s)
45 output_file.write("order\t") 47
46 output_file.write("family\t")
47 output_file.write("genus\t")
48 output_file.write("species\t")
49 output_file.write("strains\t")
50 output_file.write("abundance\n")
51 levels_number = 8 48 levels_number = 8
52 49 for line in input_f.readlines():
53 for line in input_file.readlines():
54 if line.startswith("#"): 50 if line.startswith("#"):
55 continue 51 continue
56 52
57 split_line = line[:-1].split('\t') 53 split_line = line[:-1].split('\t')
58 all_taxo = split_line[0] 54 all_taxo = split_line[0]
59 abundance = split_line[1] 55 abundance = split_line[1]
60 56
61 split_taxo = all_taxo.split('|') 57 split_taxo = all_taxo.split('|')
62 for level in split_taxo: 58 for level in split_taxo:
63 taxo = level.split('__')[1] 59 taxo = level.split('__')[1]
64 taxo = taxo.replace("_"," ") 60 taxo = taxo.replace("_", " ")
65 output_file.write(taxo + '\t') 61 output_f.write(taxo + '\t')
66 62
67 for i in range(len(split_taxo), levels_number): 63 for i in range(len(split_taxo), levels_number):
68 output_file.write('\t') 64 output_f.write('\t')
69 65
70 output_file.write(abundance + "\n") 66 output_f.write(abundance + "\n")
71
72 67
73 last_taxo_level = split_taxo[-1].split('__') 68 last_taxo_level = split_taxo[-1].split('__')
74 taxo = last_taxo_level[1].replace("_"," ") 69 taxo = last_taxo_level[1].replace("_", " ")
75 level = last_taxo_level[0] 70 level = last_taxo_level[0]
76 write_taxo_abundance(taxo_levels_abundance_files, level, taxo, 71 write_taxo_abundance(
72 taxo_levels_abund_f,
73 level,
74 taxo,
77 abundance) 75 abundance)
78 76
79 for taxo_level_file in taxo_levels_abundance_files: 77 for taxo_level_f in taxo_levels_abund_f:
80 taxo_levels_abundance_files[taxo_level_file].close() 78 taxo_levels_abund_f[taxo_level_f].close()
79
81 80
82 if __name__ == '__main__': 81 if __name__ == '__main__':
83 parser = argparse.ArgumentParser() 82 parser = argparse.ArgumentParser()
84 parser.add_argument('--metaphlan2_output', required=True) 83 parser.add_argument('--metaphlan2_output', required=True)
85 parser.add_argument('--all_taxo_level_abundance_file', required=True) 84 parser.add_argument('--all_taxo_level_abundance_file', required=True)
89 parser.add_argument('--order_abundance_file', required=True) 88 parser.add_argument('--order_abundance_file', required=True)
90 parser.add_argument('--family_abundance_file', required=True) 89 parser.add_argument('--family_abundance_file', required=True)
91 parser.add_argument('--genus_abundance_file', required=True) 90 parser.add_argument('--genus_abundance_file', required=True)
92 parser.add_argument('--species_abundance_file', required=True) 91 parser.add_argument('--species_abundance_file', required=True)
93 parser.add_argument('--strains_abundance_file', required=True) 92 parser.add_argument('--strains_abundance_file', required=True)
94
95 args = parser.parse_args() 93 args = parser.parse_args()
96 94
97 format_metaphlan2_output(args) 95 format_metaphlan2_output(args)