Mercurial > repos > bebatut > format_metaphlan2_output
comparison format_metaphlan2_output.py @ 0:2bfa9b200600 draft
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_metaphlan2_output/ commit 36388cac89e4ffa55fe317d10cbf97346ab00a71-dirty
author | bebatut |
---|---|
date | Wed, 20 Apr 2016 07:52:41 -0400 |
parents | |
children | 1e74cb2c8e67 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bfa9b200600 |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 import sys | |
5 import os | |
6 import argparse | |
7 import re | |
8 | |
9 taxo_level_correspondance = {} | |
10 taxo_level_correspondance['k'] = 'kingdom' | |
11 taxo_level_correspondance['p'] = 'phylum' | |
12 taxo_level_correspondance['c'] = 'class' | |
13 taxo_level_correspondance['o'] = 'order' | |
14 taxo_level_correspondance['f'] = 'family' | |
15 taxo_level_correspondance['g'] = 'genus' | |
16 taxo_level_correspondance['s'] = 'species' | |
17 taxo_level_correspondance['t'] = 'strains' | |
18 | |
19 def write_taxo_abundance(output_files, level, taxo, abundance): | |
20 if not taxo_level_correspondance.has_key(level): | |
21 raise ValueError(level + ' is not a know taxonomic level') | |
22 output_files[taxo_level_correspondance[level]].write(taxo + '\t') | |
23 output_files[taxo_level_correspondance[level]].write(abundance + '\n') | |
24 | |
25 def format_metaphlan2_output(args): | |
26 taxo_levels_abundance_files = {} | |
27 taxo_levels_abundance_files['kingdom'] = open(args.kingdom_abundance_file, 'w') | |
28 taxo_levels_abundance_files['phylum'] = open(args.phylum_abundance_file, 'w') | |
29 taxo_levels_abundance_files['class'] = open(args.class_abundance_file, 'w') | |
30 taxo_levels_abundance_files['order'] = open(args.order_abundance_file, 'w') | |
31 taxo_levels_abundance_files['family'] = open(args.family_abundance_file, 'w') | |
32 taxo_levels_abundance_files['genus'] = open(args.genus_abundance_file, 'w') | |
33 taxo_levels_abundance_files['species'] = open(args.species_abundance_file, 'w') | |
34 taxo_levels_abundance_files['strains'] = open(args.strains_abundance_file, 'w') | |
35 | |
36 for taxo_level_file in taxo_levels_abundance_files: | |
37 taxo_levels_abundance_files[taxo_level_file].write(taxo_level_file + '\t') | |
38 taxo_levels_abundance_files[taxo_level_file].write('abundance\n') | |
39 | |
40 with open(args.metaphlan2_output, 'r') as input_file: | |
41 with open(args.all_taxo_level_abundance_file, 'w') as output_file: | |
42 output_file.write("kingdom\t") | |
43 output_file.write("phylum\t") | |
44 output_file.write("class\t") | |
45 output_file.write("order\t") | |
46 output_file.write("family\t") | |
47 output_file.write("genus\t") | |
48 output_file.write("species\t") | |
49 output_file.write("strains\t") | |
50 output_file.write("abundance\n") | |
51 levels_number = 8 | |
52 | |
53 for line in input_file.readlines(): | |
54 if line.startswith("#"): | |
55 continue | |
56 | |
57 split_line = line[:-1].split('\t') | |
58 all_taxo = split_line[0] | |
59 abundance = split_line[1] | |
60 | |
61 split_taxo = all_taxo.split('|') | |
62 for level in split_taxo: | |
63 taxo = level.split('__')[1] | |
64 taxo = taxo.replace("_"," ") | |
65 output_file.write(taxo + '\t') | |
66 | |
67 for i in range(len(split_taxo), levels_number): | |
68 output_file.write('\t') | |
69 | |
70 output_file.write(abundance + "\n") | |
71 | |
72 | |
73 last_taxo_level = split_taxo[-1].split('__') | |
74 taxo = last_taxo_level[1].replace("_"," ") | |
75 level = last_taxo_level[0] | |
76 write_taxo_abundance(taxo_levels_abundance_files, level, taxo, | |
77 abundance) | |
78 | |
79 for taxo_level_file in taxo_levels_abundance_files: | |
80 taxo_levels_abundance_files[taxo_level_file].close() | |
81 | |
82 if __name__ == '__main__': | |
83 parser = argparse.ArgumentParser() | |
84 parser.add_argument('--metaphlan2_output', required=True) | |
85 parser.add_argument('--all_taxo_level_abundance_file', required=True) | |
86 parser.add_argument('--kingdom_abundance_file', required=True) | |
87 parser.add_argument('--phylum_abundance_file', required=True) | |
88 parser.add_argument('--class_abundance_file', required=True) | |
89 parser.add_argument('--order_abundance_file', required=True) | |
90 parser.add_argument('--family_abundance_file', required=True) | |
91 parser.add_argument('--genus_abundance_file', required=True) | |
92 parser.add_argument('--species_abundance_file', required=True) | |
93 parser.add_argument('--strains_abundance_file', required=True) | |
94 | |
95 args = parser.parse_args() | |
96 | |
97 format_metaphlan2_output(args) |