Repository 'metaphlan'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/metaphlan

Changeset 1:b89b0765695d (2021-05-17)
Previous changeset 0:f5df500fcc3c (2021-04-19) Next changeset 2:a92a632c4d9b (2021-06-14)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
modified:
macros.xml
metaphlan.xml
added:
formatoutput.py
b
diff -r f5df500fcc3c -r b89b0765695d formatoutput.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/formatoutput.py Mon May 17 20:10:24 2021 +0000
[
@@ -0,0 +1,144 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import argparse
+import re
+from pathlib import Path
+
+taxo_level = {
+    'k': 'kingdom',
+    'p': 'phylum',
+    'c': 'class',
+    'o': 'order',
+    'f': 'family',
+    'g': 'genus',
+    's': 'species',
+    't': 'strains'}
+
+
+def split_levels(metaphlan_output_fp, out_dp, legacy_output):
+    '''
+    Split default MetaPhlAn into a report for each taxonomic level
+
+    :param metaphlan_output_fp: Path default MetaPhlAn output
+    :param out_dp: Path to output directory
+    :param legacy_output: Boolean for legacy output
+    '''
+    # prepare output files
+    abund_f = {
+        'k': open(out_dp / Path('kingdom'), 'w'),
+        'p': open(out_dp / Path('phylum'), 'w'),
+        'c': open(out_dp / Path('class'), 'w'),
+        'o': open(out_dp / Path('order'), 'w'),
+        'f': open(out_dp / Path('family'), 'w'),
+        'g': open(out_dp / Path('genus'), 'w'),
+        's': open(out_dp / Path('species'), 'w'),
+        't': open(out_dp / Path('strains'), 'w')
+    }
+    for level in abund_f:
+        abund_f[level].write("%s\t" % taxo_level[level])
+        if not legacy_output:
+            abund_f[level].write("%s_id\t" % taxo_level[level])
+        abund_f[level].write("abundance\n")
+
+    levels_number = len(taxo_level)
+
+    with open(metaphlan_output_fp, 'r') as metaphlan_output_f:
+        with open(out_dp / Path('all'), 'w') as all_level_f:
+            # write header in all leve file
+            for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']:
+                all_level_f.write("%s\t" % taxo_level[level])
+                if not legacy_output:
+                    all_level_f.write("%s_id\t" % taxo_level[level])
+            all_level_f.write("abundance\n")
+
+            # parse metaphlan file
+            for line in metaphlan_output_f.readlines():
+                # skip headers
+                if line.startswith("#"):
+                    continue
+
+                # spit lines
+                split_line = line[:-1].split('\t')
+                taxo_n = split_line[0].split('|')
+                if legacy_output:
+                    abundance = split_line[1]
+                else:
+                    taxo_id = split_line[1].split('|')
+                    abundance = split_line[2]
+
+                # get taxon name and ids
+                for i in range(len(taxo_n)):
+                    taxo = taxo_n[i].split('__')[1]
+                    taxo = taxo.replace("_", " ")
+                    all_level_f.write("%s\t" % taxo)
+                    if not legacy_output:
+                        all_level_f.write("%s\t" % taxo_id[i])
+
+                # if not all taxon levels
+                for i in range(len(taxo_n), levels_number):
+                    all_level_f.write('\t')
+
+                all_level_f.write("%s\n" % abundance)
+
+                # write
+                last_taxo_level = taxo_n[-1].split('__')
+                taxo = last_taxo_level[1].replace("_", " ")
+                level = last_taxo_level[0]
+                abund_f[level].write("%s\t" % taxo)
+                if not legacy_output:
+                    abund_f[level].write("%s\t" % taxo_id[-1])
+                abund_f[level].write("%s\n" % abundance)
+
+    # close files
+    for taxo_level_f in abund_f:
+        abund_f[taxo_level_f].close()
+
+
+def format_for_krona(metaphlan_output_fp, krona_out_fp):
+    '''
+    Split default MetaPhlAn into a report for each taxonomic levKRONAel
+
+    :param metaphlan_output_fp: Path default MetaPhlAn output
+    :param krona_out: Path to output file for Krona
+    '''
+    re_replace = re.compile(r"\w__")
+    re_bar = re.compile(r"\|")
+    re_underscore = re.compile(r"_")
+
+    with open(metaphlan_output_fp, 'r') as metaphlan_output_f:
+        with open(krona_out_fp, 'w') as krona_out_f:
+            for line in metaphlan_output_f.readlines():
+                if "s__" in line:
+                    x = line.rstrip().split('\t')
+                    lineage = re.sub(re_bar, '', x[0])
+                    lineage = re.sub(re_replace, '\t', lineage)
+                    lineage = re.sub(re_underscore, ' ', lineage)
+                    krona_out_f.write("%s\t%s\n" % (x[-1], lineage))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Format MetaPhlAn output')
+    subparsers = parser.add_subparsers(dest='function')
+    # split_levels
+    split_levels_parser = subparsers.add_parser('split_levels', help='Split default MetaPhlAn into a report for each taxonomic level')
+    split_levels_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output")
+    split_levels_parser.add_argument('--outdir', help="Path to output directory")
+    split_levels_parser.add_argument('--legacy-output', dest='legacy_output', action='store_true', help="Old MetaPhlAn2 two columns output")
+    split_levels_parser.set_defaults(legacy_output=False)
+    # format_for_krona
+    format_for_krona_parser = subparsers.add_parser('format_for_krona', help='Split default MetaPhlAn into a report for each taxonomic level')
+    format_for_krona_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output")
+    format_for_krona_parser.add_argument('--krona_output', help="Path to Krona output directory")
+
+    args = parser.parse_args()
+
+    if args.function == 'split_levels':
+        split_levels(
+            Path(args.metaphlan_output),
+            Path(args.outdir),
+            args.legacy_output)
+    elif args.function == 'format_for_krona':
+        format_for_krona(
+            Path(args.metaphlan_output),
+            Path(args.krona_output))
b
diff -r f5df500fcc3c -r b89b0765695d macros.xml
--- a/macros.xml Mon Apr 19 20:56:20 2021 +0000
+++ b/macros.xml Mon May 17 20:10:24 2021 +0000
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">3.0.7</token>
+    <token name="@TOOL_VERSION@">3.0.8</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">20.01</token>
     <xml name="edam_ontology">
@@ -24,17 +24,4 @@
             <citation type="doi">1101/2020.11.19.388223</citation>
         </citations>
     </xml>
-    <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz,fasta.bz2</token>
-    <xml name="tax_lev">
-        <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output">
-            <option value="a" selected="true">All taxonomic levels</option>
-            <option value="k">Kingdoms only</option>
-            <option value="p">Phyla only</option>
-            <option value="c">Classes only</option>
-            <option value="o">Orders only</option>
-            <option value="f">Families only</option>
-            <option value="g">Genera only</option>
-            <option value="s">Species only</option>
-        </param>
-    </xml>
 </macros>
b
diff -r f5df500fcc3c -r b89b0765695d metaphlan.xml
--- a/metaphlan.xml Mon Apr 19 20:56:20 2021 +0000
+++ b/metaphlan.xml Mon May 17 20:10:24 2021 +0000
[
b'@@ -2,6 +2,32 @@\n     <description>to profile the composition of microbial communities</description>\n     <macros>\n         <import>macros.xml</import>\n+        <xml name="tax_lev">\n+            <conditional name="tax_lev">\n+                <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output">\n+                    <option value="a" selected="true">All taxonomic levels</option>\n+                    <option value="k">Kingdoms only</option>\n+                    <option value="p">Phyla only</option>\n+                    <option value="c">Classes only</option>\n+                    <option value="o">Orders only</option>\n+                    <option value="f">Families only</option>\n+                    <option value="g">Genera only</option>\n+                    <option value="s">Species only</option>\n+                </param>\n+                <when value="a">\n+                    <param name="split_levels" type=\'boolean\' checked="false" truevalue=\'true\' falsevalue=\'false\' \n+                        label="Generate a report for each taxonomic level?" help="It will be in addition to the default output"/>\n+                </when>\n+                <when value="k"/>\n+                <when value="p"/>\n+                <when value="c"/>\n+                <when value="o"/>\n+                <when value="f"/>\n+                <when value="g"/>\n+                <when value="s"/>\n+            </conditional>\n+        </xml>\n+        <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz,fasta.bz2</token>\n     </macros>\n     <expand macro="edam_ontology"/>\n     <expand macro="requirements"/>\n@@ -110,7 +136,7 @@\n #end if\n     -t \'$analysis.analysis_type.t\'\n #if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"\n-    --tax_lev \'$analysis.analysis_type.tax_lev\'\n+    --tax_lev \'$analysis.analysis_type.tax_lev.tax_lev\'\n #else if $analysis.analysis_type.t == "clade_specific_strain_tracker"\n     --clade \'$analysis.analysis_type.clade\'\n     #if str($analysis.analysis_type.min_ab) != \'\'\n@@ -159,6 +185,27 @@\n &&\n mv \'bowtie2out\' \'$bowtie2out\'\n #end if\n+\n+#if $analysis.analysis_type.tax_lev.tax_lev == \'a\' and $analysis.analysis_type.tax_lev.split_levels\n+&&\n+mkdir \'split_levels\'\n+&&\n+python \'$__tool_directory__/formatoutput.py\'\n+    split_levels\n+    --metaphlan_output \'$output_file\'\n+    --outdir \'split_levels\'\n+    $out.legacy_output\n+&&\n+ls split_levels\n+#end if\n+\n+#if $out.krona_output\n+&&\n+python \'$__tool_directory__/formatoutput.py\'\n+    format_for_krona\n+    --metaphlan_output \'$output_file\'\n+    --krona_output \'$krona_output_file\'\n+#end if\n     ]]></command>\n     <inputs>\n         <section name="inputs" title="Inputs" expanded="true">\n@@ -294,6 +341,7 @@\n                 label="Report the profiling using the CAMI output format?"/>\n             <param argument="--unknown_estimation" type=\'boolean\' checked="false" truevalue=\'--unknown_estimation\' falsevalue=\'\' \n                 label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/>\n+            <param name="krona_output" type=\'boolean\' checked="false" truevalue=\'true\' falsevalue=\'false\' label="Output for Krona?"/>\n         </section>\n     </inputs>\n     <outputs>\n@@ -305,9 +353,16 @@\n             <filter>inputs[\'in\'][\'selector\'] == "raw"</filter>\n         </data>\n         <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file" />\n+        <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels" >\n+            <discover_datasets pattern="(?P&lt;designation&gt;.+)" directory="split_levels/" format="tabular"/>\n+            <filter>analysis[\'analysis_type\'][\'tax_lev\'][\'tax_lev\'] == "a" and analysis[\'analysis_type\'][\'tax_lev\'][\'split_levels\']</filter>\n+        </collection>\n+        <data name="krona_output_file" format="tabular" lab'..b'me="legacy_output" value="true"/>\n                 <param name="CAMI_format_output" value="false"/>\n                 <param name="unknown_estimation" value="false"/>\n+                <param name="krona_output" value="true"/>\n             </section>\n             <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">\n                 <assert_contents>\n@@ -647,6 +828,77 @@\n                     <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>\n                 </assert_contents>\n             </output>\n+            <output_collection name="levels" type="list" >\n+                <element name="all" ftype="tabular">\n+                    <assert_contents>\n+                        <has_text text="Gammaproteobacteria"/>\n+                        <has_text text="Corynebacterium accolens"/>\n+                        <has_n_columns n="9"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="kingdom" ftype="tabular">\n+                    <assert_contents>\n+                        <has_text text="kingdom"/>\n+                        <has_text text="Bacteria"/>\n+                        <has_n_columns n="2"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="phylum" ftype="tabular">\n+                    <assert_contents>\n+                        <has_text text="phylum"/>\n+                        <has_text text="Firmicutes"/>\n+                        <has_n_columns n="2"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="class" ftype="tabular">\n+                    <assert_contents>\n+                        <has_text text="class"/>\n+                        <has_text text="Actinobacteria"/>\n+                        <has_n_columns n="2"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="order" ftype="tabular">\n+                    <assert_contents>\n+                        <has_text text="order"/>\n+                        <has_text text="Propionibacteriales"/>\n+                        <has_n_columns n="2"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="family" ftype="tabular">\n+                    <assert_contents>\n+                        <has_text text="family"/>\n+                        <has_text text="Propionibacteriaceae"/>\n+                        <has_n_columns n="2"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="genus" ftype="tabular">\n+                    <assert_contents>\n+                        <has_text text="genus"/>\n+                        <has_text text="Cutibacterium"/>\n+                        <has_n_columns n="2"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="species" ftype="tabular">\n+                    <assert_contents>\n+                        <has_text text="species"/>\n+                        <has_text text="Corynebacterium accolens"/>\n+                        <has_n_columns n="2"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="strains" ftype="tabular">\n+                    <assert_contents>\n+                        <has_text text="strains"/>\n+                        <has_n_columns n="2"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n+            <output name="krona_output_file" ftype="tabular">\n+                <assert_contents>\n+                    <not_has_text text="k__Bacteria"/>\n+                    <has_text text="Corynebacterium accolens"/>\n+                    <has_n_columns n="9"/>\n+                </assert_contents>\n+            </output>\n         </test>\n     </tests>\n     <help><![CDATA[\n'