Previous changeset 0:f5df500fcc3c (2021-04-19) Next changeset 2:a92a632c4d9b (2021-06-14) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f" |
modified:
macros.xml metaphlan.xml |
added:
formatoutput.py |
b |
diff -r f5df500fcc3c -r b89b0765695d formatoutput.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/formatoutput.py Mon May 17 20:10:24 2021 +0000 |
[ |
@@ -0,0 +1,144 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import re +from pathlib import Path + +taxo_level = { + 'k': 'kingdom', + 'p': 'phylum', + 'c': 'class', + 'o': 'order', + 'f': 'family', + 'g': 'genus', + 's': 'species', + 't': 'strains'} + + +def split_levels(metaphlan_output_fp, out_dp, legacy_output): + ''' + Split default MetaPhlAn into a report for each taxonomic level + + :param metaphlan_output_fp: Path default MetaPhlAn output + :param out_dp: Path to output directory + :param legacy_output: Boolean for legacy output + ''' + # prepare output files + abund_f = { + 'k': open(out_dp / Path('kingdom'), 'w'), + 'p': open(out_dp / Path('phylum'), 'w'), + 'c': open(out_dp / Path('class'), 'w'), + 'o': open(out_dp / Path('order'), 'w'), + 'f': open(out_dp / Path('family'), 'w'), + 'g': open(out_dp / Path('genus'), 'w'), + 's': open(out_dp / Path('species'), 'w'), + 't': open(out_dp / Path('strains'), 'w') + } + for level in abund_f: + abund_f[level].write("%s\t" % taxo_level[level]) + if not legacy_output: + abund_f[level].write("%s_id\t" % taxo_level[level]) + abund_f[level].write("abundance\n") + + levels_number = len(taxo_level) + + with open(metaphlan_output_fp, 'r') as metaphlan_output_f: + with open(out_dp / Path('all'), 'w') as all_level_f: + # write header in all leve file + for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']: + all_level_f.write("%s\t" % taxo_level[level]) + if not legacy_output: + all_level_f.write("%s_id\t" % taxo_level[level]) + all_level_f.write("abundance\n") + + # parse metaphlan file + for line in metaphlan_output_f.readlines(): + # skip headers + if line.startswith("#"): + continue + + # spit lines + split_line = line[:-1].split('\t') + taxo_n = split_line[0].split('|') + if legacy_output: + abundance = split_line[1] + else: + taxo_id = split_line[1].split('|') + abundance = split_line[2] + + # get taxon name and ids + for i in range(len(taxo_n)): + taxo = taxo_n[i].split('__')[1] + taxo = taxo.replace("_", " ") + all_level_f.write("%s\t" % taxo) + if not legacy_output: + all_level_f.write("%s\t" % taxo_id[i]) + + # if not all taxon levels + for i in range(len(taxo_n), levels_number): + all_level_f.write('\t') + + all_level_f.write("%s\n" % abundance) + + # write + last_taxo_level = taxo_n[-1].split('__') + taxo = last_taxo_level[1].replace("_", " ") + level = last_taxo_level[0] + abund_f[level].write("%s\t" % taxo) + if not legacy_output: + abund_f[level].write("%s\t" % taxo_id[-1]) + abund_f[level].write("%s\n" % abundance) + + # close files + for taxo_level_f in abund_f: + abund_f[taxo_level_f].close() + + +def format_for_krona(metaphlan_output_fp, krona_out_fp): + ''' + Split default MetaPhlAn into a report for each taxonomic levKRONAel + + :param metaphlan_output_fp: Path default MetaPhlAn output + :param krona_out: Path to output file for Krona + ''' + re_replace = re.compile(r"\w__") + re_bar = re.compile(r"\|") + re_underscore = re.compile(r"_") + + with open(metaphlan_output_fp, 'r') as metaphlan_output_f: + with open(krona_out_fp, 'w') as krona_out_f: + for line in metaphlan_output_f.readlines(): + if "s__" in line: + x = line.rstrip().split('\t') + lineage = re.sub(re_bar, '', x[0]) + lineage = re.sub(re_replace, '\t', lineage) + lineage = re.sub(re_underscore, ' ', lineage) + krona_out_f.write("%s\t%s\n" % (x[-1], lineage)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Format MetaPhlAn output') + subparsers = parser.add_subparsers(dest='function') + # split_levels + split_levels_parser = subparsers.add_parser('split_levels', help='Split default MetaPhlAn into a report for each taxonomic level') + split_levels_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output") + split_levels_parser.add_argument('--outdir', help="Path to output directory") + split_levels_parser.add_argument('--legacy-output', dest='legacy_output', action='store_true', help="Old MetaPhlAn2 two columns output") + split_levels_parser.set_defaults(legacy_output=False) + # format_for_krona + format_for_krona_parser = subparsers.add_parser('format_for_krona', help='Split default MetaPhlAn into a report for each taxonomic level') + format_for_krona_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output") + format_for_krona_parser.add_argument('--krona_output', help="Path to Krona output directory") + + args = parser.parse_args() + + if args.function == 'split_levels': + split_levels( + Path(args.metaphlan_output), + Path(args.outdir), + args.legacy_output) + elif args.function == 'format_for_krona': + format_for_krona( + Path(args.metaphlan_output), + Path(args.krona_output)) |
b |
diff -r f5df500fcc3c -r b89b0765695d macros.xml --- a/macros.xml Mon Apr 19 20:56:20 2021 +0000 +++ b/macros.xml Mon May 17 20:10:24 2021 +0000 |
b |
@@ -1,6 +1,6 @@ <?xml version="1.0"?> <macros> - <token name="@TOOL_VERSION@">3.0.7</token> + <token name="@TOOL_VERSION@">3.0.8</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">20.01</token> <xml name="edam_ontology"> @@ -24,17 +24,4 @@ <citation type="doi">1101/2020.11.19.388223</citation> </citations> </xml> - <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz,fasta.bz2</token> - <xml name="tax_lev"> - <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output"> - <option value="a" selected="true">All taxonomic levels</option> - <option value="k">Kingdoms only</option> - <option value="p">Phyla only</option> - <option value="c">Classes only</option> - <option value="o">Orders only</option> - <option value="f">Families only</option> - <option value="g">Genera only</option> - <option value="s">Species only</option> - </param> - </xml> </macros> |
b |
diff -r f5df500fcc3c -r b89b0765695d metaphlan.xml --- a/metaphlan.xml Mon Apr 19 20:56:20 2021 +0000 +++ b/metaphlan.xml Mon May 17 20:10:24 2021 +0000 |
[ |
b'@@ -2,6 +2,32 @@\n <description>to profile the composition of microbial communities</description>\n <macros>\n <import>macros.xml</import>\n+ <xml name="tax_lev">\n+ <conditional name="tax_lev">\n+ <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output">\n+ <option value="a" selected="true">All taxonomic levels</option>\n+ <option value="k">Kingdoms only</option>\n+ <option value="p">Phyla only</option>\n+ <option value="c">Classes only</option>\n+ <option value="o">Orders only</option>\n+ <option value="f">Families only</option>\n+ <option value="g">Genera only</option>\n+ <option value="s">Species only</option>\n+ </param>\n+ <when value="a">\n+ <param name="split_levels" type=\'boolean\' checked="false" truevalue=\'true\' falsevalue=\'false\' \n+ label="Generate a report for each taxonomic level?" help="It will be in addition to the default output"/>\n+ </when>\n+ <when value="k"/>\n+ <when value="p"/>\n+ <when value="c"/>\n+ <when value="o"/>\n+ <when value="f"/>\n+ <when value="g"/>\n+ <when value="s"/>\n+ </conditional>\n+ </xml>\n+ <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz,fasta.bz2</token>\n </macros>\n <expand macro="edam_ontology"/>\n <expand macro="requirements"/>\n@@ -110,7 +136,7 @@\n #end if\n -t \'$analysis.analysis_type.t\'\n #if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"\n- --tax_lev \'$analysis.analysis_type.tax_lev\'\n+ --tax_lev \'$analysis.analysis_type.tax_lev.tax_lev\'\n #else if $analysis.analysis_type.t == "clade_specific_strain_tracker"\n --clade \'$analysis.analysis_type.clade\'\n #if str($analysis.analysis_type.min_ab) != \'\'\n@@ -159,6 +185,27 @@\n &&\n mv \'bowtie2out\' \'$bowtie2out\'\n #end if\n+\n+#if $analysis.analysis_type.tax_lev.tax_lev == \'a\' and $analysis.analysis_type.tax_lev.split_levels\n+&&\n+mkdir \'split_levels\'\n+&&\n+python \'$__tool_directory__/formatoutput.py\'\n+ split_levels\n+ --metaphlan_output \'$output_file\'\n+ --outdir \'split_levels\'\n+ $out.legacy_output\n+&&\n+ls split_levels\n+#end if\n+\n+#if $out.krona_output\n+&&\n+python \'$__tool_directory__/formatoutput.py\'\n+ format_for_krona\n+ --metaphlan_output \'$output_file\'\n+ --krona_output \'$krona_output_file\'\n+#end if\n ]]></command>\n <inputs>\n <section name="inputs" title="Inputs" expanded="true">\n@@ -294,6 +341,7 @@\n label="Report the profiling using the CAMI output format?"/>\n <param argument="--unknown_estimation" type=\'boolean\' checked="false" truevalue=\'--unknown_estimation\' falsevalue=\'\' \n label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/>\n+ <param name="krona_output" type=\'boolean\' checked="false" truevalue=\'true\' falsevalue=\'false\' label="Output for Krona?"/>\n </section>\n </inputs>\n <outputs>\n@@ -305,9 +353,16 @@\n <filter>inputs[\'in\'][\'selector\'] == "raw"</filter>\n </data>\n <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file" />\n+ <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels" >\n+ <discover_datasets pattern="(?P<designation>.+)" directory="split_levels/" format="tabular"/>\n+ <filter>analysis[\'analysis_type\'][\'tax_lev\'][\'tax_lev\'] == "a" and analysis[\'analysis_type\'][\'tax_lev\'][\'split_levels\']</filter>\n+ </collection>\n+ <data name="krona_output_file" format="tabular" lab'..b'me="legacy_output" value="true"/>\n <param name="CAMI_format_output" value="false"/>\n <param name="unknown_estimation" value="false"/>\n+ <param name="krona_output" value="true"/>\n </section>\n <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">\n <assert_contents>\n@@ -647,6 +828,77 @@\n <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>\n </assert_contents>\n </output>\n+ <output_collection name="levels" type="list" >\n+ <element name="all" ftype="tabular">\n+ <assert_contents>\n+ <has_text text="Gammaproteobacteria"/>\n+ <has_text text="Corynebacterium accolens"/>\n+ <has_n_columns n="9"/>\n+ </assert_contents>\n+ </element>\n+ <element name="kingdom" ftype="tabular">\n+ <assert_contents>\n+ <has_text text="kingdom"/>\n+ <has_text text="Bacteria"/>\n+ <has_n_columns n="2"/>\n+ </assert_contents>\n+ </element>\n+ <element name="phylum" ftype="tabular">\n+ <assert_contents>\n+ <has_text text="phylum"/>\n+ <has_text text="Firmicutes"/>\n+ <has_n_columns n="2"/>\n+ </assert_contents>\n+ </element>\n+ <element name="class" ftype="tabular">\n+ <assert_contents>\n+ <has_text text="class"/>\n+ <has_text text="Actinobacteria"/>\n+ <has_n_columns n="2"/>\n+ </assert_contents>\n+ </element>\n+ <element name="order" ftype="tabular">\n+ <assert_contents>\n+ <has_text text="order"/>\n+ <has_text text="Propionibacteriales"/>\n+ <has_n_columns n="2"/>\n+ </assert_contents>\n+ </element>\n+ <element name="family" ftype="tabular">\n+ <assert_contents>\n+ <has_text text="family"/>\n+ <has_text text="Propionibacteriaceae"/>\n+ <has_n_columns n="2"/>\n+ </assert_contents>\n+ </element>\n+ <element name="genus" ftype="tabular">\n+ <assert_contents>\n+ <has_text text="genus"/>\n+ <has_text text="Cutibacterium"/>\n+ <has_n_columns n="2"/>\n+ </assert_contents>\n+ </element>\n+ <element name="species" ftype="tabular">\n+ <assert_contents>\n+ <has_text text="species"/>\n+ <has_text text="Corynebacterium accolens"/>\n+ <has_n_columns n="2"/>\n+ </assert_contents>\n+ </element>\n+ <element name="strains" ftype="tabular">\n+ <assert_contents>\n+ <has_text text="strains"/>\n+ <has_n_columns n="2"/>\n+ </assert_contents>\n+ </element>\n+ </output_collection>\n+ <output name="krona_output_file" ftype="tabular">\n+ <assert_contents>\n+ <not_has_text text="k__Bacteria"/>\n+ <has_text text="Corynebacterium accolens"/>\n+ <has_n_columns n="9"/>\n+ </assert_contents>\n+ </output>\n </test>\n </tests>\n <help><![CDATA[\n' |