comparison formatoutput.py @ 4:2e5ae05160e0 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
author iuc
date Mon, 17 May 2021 20:09:54 +0000
parents
children 27258eb73f0c
comparison
equal deleted inserted replaced
3:3f05bf162005 4:2e5ae05160e0
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import argparse
5 import re
6 from pathlib import Path
7
8 taxo_level = {
9 'k': 'kingdom',
10 'p': 'phylum',
11 'c': 'class',
12 'o': 'order',
13 'f': 'family',
14 'g': 'genus',
15 's': 'species',
16 't': 'strains'}
17
18
19 def split_levels(metaphlan_output_fp, out_dp, legacy_output):
20 '''
21 Split default MetaPhlAn into a report for each taxonomic level
22
23 :param metaphlan_output_fp: Path default MetaPhlAn output
24 :param out_dp: Path to output directory
25 :param legacy_output: Boolean for legacy output
26 '''
27 # prepare output files
28 abund_f = {
29 'k': open(out_dp / Path('kingdom'), 'w'),
30 'p': open(out_dp / Path('phylum'), 'w'),
31 'c': open(out_dp / Path('class'), 'w'),
32 'o': open(out_dp / Path('order'), 'w'),
33 'f': open(out_dp / Path('family'), 'w'),
34 'g': open(out_dp / Path('genus'), 'w'),
35 's': open(out_dp / Path('species'), 'w'),
36 't': open(out_dp / Path('strains'), 'w')
37 }
38 for level in abund_f:
39 abund_f[level].write("%s\t" % taxo_level[level])
40 if not legacy_output:
41 abund_f[level].write("%s_id\t" % taxo_level[level])
42 abund_f[level].write("abundance\n")
43
44 levels_number = len(taxo_level)
45
46 with open(metaphlan_output_fp, 'r') as metaphlan_output_f:
47 with open(out_dp / Path('all'), 'w') as all_level_f:
48 # write header in all leve file
49 for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']:
50 all_level_f.write("%s\t" % taxo_level[level])
51 if not legacy_output:
52 all_level_f.write("%s_id\t" % taxo_level[level])
53 all_level_f.write("abundance\n")
54
55 # parse metaphlan file
56 for line in metaphlan_output_f.readlines():
57 # skip headers
58 if line.startswith("#"):
59 continue
60
61 # spit lines
62 split_line = line[:-1].split('\t')
63 taxo_n = split_line[0].split('|')
64 if legacy_output:
65 abundance = split_line[1]
66 else:
67 taxo_id = split_line[1].split('|')
68 abundance = split_line[2]
69
70 # get taxon name and ids
71 for i in range(len(taxo_n)):
72 taxo = taxo_n[i].split('__')[1]
73 taxo = taxo.replace("_", " ")
74 all_level_f.write("%s\t" % taxo)
75 if not legacy_output:
76 all_level_f.write("%s\t" % taxo_id[i])
77
78 # if not all taxon levels
79 for i in range(len(taxo_n), levels_number):
80 all_level_f.write('\t')
81
82 all_level_f.write("%s\n" % abundance)
83
84 # write
85 last_taxo_level = taxo_n[-1].split('__')
86 taxo = last_taxo_level[1].replace("_", " ")
87 level = last_taxo_level[0]
88 abund_f[level].write("%s\t" % taxo)
89 if not legacy_output:
90 abund_f[level].write("%s\t" % taxo_id[-1])
91 abund_f[level].write("%s\n" % abundance)
92
93 # close files
94 for taxo_level_f in abund_f:
95 abund_f[taxo_level_f].close()
96
97
98 def format_for_krona(metaphlan_output_fp, krona_out_fp):
99 '''
100 Split default MetaPhlAn into a report for each taxonomic levKRONAel
101
102 :param metaphlan_output_fp: Path default MetaPhlAn output
103 :param krona_out: Path to output file for Krona
104 '''
105 re_replace = re.compile(r"\w__")
106 re_bar = re.compile(r"\|")
107 re_underscore = re.compile(r"_")
108
109 with open(metaphlan_output_fp, 'r') as metaphlan_output_f:
110 with open(krona_out_fp, 'w') as krona_out_f:
111 for line in metaphlan_output_f.readlines():
112 if "s__" in line:
113 x = line.rstrip().split('\t')
114 lineage = re.sub(re_bar, '', x[0])
115 lineage = re.sub(re_replace, '\t', lineage)
116 lineage = re.sub(re_underscore, ' ', lineage)
117 krona_out_f.write("%s\t%s\n" % (x[-1], lineage))
118
119
120 if __name__ == '__main__':
121 parser = argparse.ArgumentParser(description='Format MetaPhlAn output')
122 subparsers = parser.add_subparsers(dest='function')
123 # split_levels
124 split_levels_parser = subparsers.add_parser('split_levels', help='Split default MetaPhlAn into a report for each taxonomic level')
125 split_levels_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output")
126 split_levels_parser.add_argument('--outdir', help="Path to output directory")
127 split_levels_parser.add_argument('--legacy-output', dest='legacy_output', action='store_true', help="Old MetaPhlAn2 two columns output")
128 split_levels_parser.set_defaults(legacy_output=False)
129 # format_for_krona
130 format_for_krona_parser = subparsers.add_parser('format_for_krona', help='Split default MetaPhlAn into a report for each taxonomic level')
131 format_for_krona_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output")
132 format_for_krona_parser.add_argument('--krona_output', help="Path to Krona output directory")
133
134 args = parser.parse_args()
135
136 if args.function == 'split_levels':
137 split_levels(
138 Path(args.metaphlan_output),
139 Path(args.outdir),
140 args.legacy_output)
141 elif args.function == 'format_for_krona':
142 format_for_krona(
143 Path(args.metaphlan_output),
144 Path(args.krona_output))