view metagenome_pipeline.xml @ 1:709adbfcf308 draft default tip

planemo upload for repository https://github.com/picrust/picrust2 commit f8c32a316582ff102c9b7edf3817000691ef9eea
author iuc
date Tue, 13 Aug 2024 12:10:55 +0000
parents 415cb5d91168
children
line wrap: on
line source

<tool id="picrust2_metagenome_pipeline" name="PICRUSt2 Metagenome prediction" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>to generate per-sample metagenome functional profiles based on the predicted functions for each study sequence</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tool"/>
    <expand macro="requirements"/>
    <version_command>metagenome_pipeline.py -v</version_command>
    <command detect_errors="exit_code"><![CDATA[
@VAR_ACCESS_FOO@
@PREPARE_METAGENOME_PIPELINE_PARAMS@
metagenome_pipeline.py
    --function '$function'
    --marker '$marker'
    @METAGENOME_PIPELINE_PARAMS@
    --out_dir 'metagenome_output'
&& find metagenome_output -name "*.gz" -exec gunzip {} \;
&& true
    ]]></command>
    <inputs>
        <expand macro="metagenome_pipeline_params" stratified_arg="--strat_out">
            <param argument="--function" type="data" format="tabular" label="Table with predicted gene family copy numbers" help="This table is generated by the tool for Hidden state prediction (HSP)"/>
            <param argument="--marker" type="data" format="tabular" label="Table of predicted marker gene (16S or other) copy numbers" help="This table is generated by the tool for Hidden state prediction (HSP)"/>
        </expand>
    </inputs>
    <outputs>
        <data name="pred_metagenome_unstrat" format="tabular" from_work_dir="metagenome_output/pred_metagenome_unstrat.tsv" label="${tool.name} on ${on_string}: Predicted per-sample metagenome functional profiles"/>
        <data name="seqtab_norm" format="tabular" from_work_dir="metagenome_output/seqtab_norm.tsv" label="${tool.name} on ${on_string}: Normalized sequence abundance table"/>
        <data name="weighted_nsti" format="tabular" from_work_dir="metagenome_output/weighted_nsti.tsv" label="${tool.name} on ${on_string}: Weighted nearest-sequenced taxon index (NSTI) values per-sample"/>
        <data name="strat_output" format="tabular" from_work_dir="metagenome_output/pred_metagenome_contrib.tsv" label="${tool.name} on ${on_string}: Predicted per-sample metagenome functional profiles, stratified by sequence ids (i.e. taxonomic contributors)" >
            <filter>stratified_output['selector'] != '' and not stratified_output['wide_table']</filter>
        </data>
        <data name="wide_table_output" format="tabular" from_work_dir="metagenome_output/pred_metagenome_strat.tsv" label="${tool.name} on ${on_string}: Predicted per-sample metagenome functional profiles, wide-format stratified by sequence ids (i.e. taxonomic contributors)" >
            <filter>stratified_output['selector'] != '' and stratified_output['wide_table']</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="input" ftype="mothur.shared" value="table.mothur.shared"/>
            <param name="function" ftype="tabular" value="EC_predicted.tsv.gz"/>
            <param name="marker" ftype="tabular" value="16S_predicted_and_nsti.tsv.gz"/>
            <param name="max_nsti" value="2.0"/>
            <param name="skip_norm" value="false"/>
            <conditional name="stratified_output">
                <param name="selector" value=""/>
            </conditional>
            <conditional name="input_options">
                <param name="selector" value="OTU"/>
            </conditional>
            <output name="pred_metagenome_unstrat" ftype="tabular">
                <assert_contents>
                    <has_text text="function"/>
                    <has_n_lines n="1000"/>
                </assert_contents>
            </output>
            <output name="seqtab_norm" ftype="tabular">
                <assert_contents>
                    <has_text text="normalized"/>
                    <has_n_lines n="38"/>
                </assert_contents>
            </output>
            <output name="weighted_nsti" ftype="tabular">
                <assert_contents>
                    <has_text text="weighted_NSTI"/>
                    <has_n_lines n="25"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <param name="input" ftype="biom1" value="table.biom"/>
            <param name="function" ftype="tabular" value="EC_predicted.tsv.gz"/>
            <param name="marker" ftype="tabular" value="16S_predicted_and_nsti.tsv.gz"/>
            <param name="max_nsti" value="2.0"/>
            <param name="skip_norm" value="false"/>
            <conditional name="stratified_output">
                <param name="selector" value=""/>
            </conditional>
            <conditional name="input_options">
                <param name="selector" value="ASV"/>
                <param name="min_reads" value="1"/>
                <param name="min_samples" value="1"/>
            </conditional>
            <output name="pred_metagenome_unstrat" ftype="tabular">
                <assert_contents>
                    <has_text text="function"/>
                    <has_n_lines n="1000"/>
                </assert_contents>
            </output>
            <output name="seqtab_norm" ftype="tabular">
                <assert_contents>
                    <has_text text="normalized"/>
                    <has_n_lines n="38"/>
                </assert_contents>
            </output>
            <output name="weighted_nsti" ftype="tabular">
                <assert_contents>
                    <has_text text="weighted_NSTI"/>
                    <has_n_lines n="25"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="input" ftype="biom1" value="table.biom"/>
            <param name="function" ftype="tabular" value="EC_predicted.tsv.gz"/>
            <param name="marker" ftype="tabular" value="16S_predicted_and_nsti.tsv.gz"/>
            <param name="max_nsti" value="2.0"/>
            <param name="skip_norm" value="false"/>
            <conditional name="stratified_output">
                <param name="selector" value="--strat_out"/>
                <param name="wide_table" value="true"/>
            </conditional>
            <conditional name="input_options">
                <param name="selector" value="ASV"/>
                <param name="min_reads" value="1"/>
                <param name="min_samples" value="1"/>
            </conditional>
            <output name="pred_metagenome_unstrat" ftype="tabular">
                <assert_contents>
                    <has_text text="function"/>
                    <has_n_lines n="1000"/>
                </assert_contents>
            </output>
            <output name="seqtab_norm" ftype="tabular">
                <assert_contents>
                    <has_text text="normalized"/>
                    <has_n_lines n="38"/>
                </assert_contents>
            </output>
            <output name="weighted_nsti" ftype="tabular">
                <assert_contents>
                    <has_text text="weighted_NSTI"/>
                    <has_n_lines n="25"/>
                </assert_contents>
            </output>
            <output name="wide_table_output" ftype="tabular">
                <assert_contents>
                    <has_text text="EC:1.1.1.1"/>
                    <has_n_lines n="18101"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="input" ftype="biom1" value="table.biom"/>
            <param name="function" ftype="tabular" value="EC_predicted.tsv.gz"/>
            <param name="marker" ftype="tabular" value="16S_predicted_and_nsti.tsv.gz"/>
            <param name="max_nsti" value="2.0"/>
            <param name="skip_norm" value="false"/>
            <conditional name="stratified_output">
                <param name="selector" value="--strat_out"/>
                <param name="wide_table" value="false"/>
            </conditional>
            <conditional name="input_options">
                <param name="selector" value="ASV"/>
                <param name="min_reads" value="1"/>
                <param name="min_samples" value="1"/>
            </conditional>
            <output name="pred_metagenome_unstrat" ftype="tabular">
                <assert_contents>
                    <has_text text="function"/>
                    <has_n_lines n="1000"/>
                </assert_contents>
            </output>
            <output name="seqtab_norm" ftype="tabular">
                <assert_contents>
                    <has_text text="normalized"/>
                    <has_n_lines n="38"/>
                </assert_contents>
            </output>
            <output name="weighted_nsti" ftype="tabular">
                <assert_contents>
                    <has_text text="weighted_NSTI"/>
                    <has_n_lines n="25"/>
                </assert_contents>
            </output>
            <output name="strat_output" ftype="tabular">
                <assert_contents>
                    <has_text text="100CHE6KO"/>
                    <has_n_lines n="298016"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@

Metagenome Pipeline
===================
Reads in a sequence abundance table (the abundances of OTUs or ASVs in BIOM, TSV, or mothur shared file format), the predicted marker gene abundances, and the predicted gene family abundances (these last two files are output by hsp.py).

Note
====
Per-sample metagenome functional profiles are generated based on the predicted functions for each study sequence. Note that typically these sequences correspond to OTUs or ASVs. The specified sequence abundance table will be normalized by the predicted number of marker gene copies before outputting the final files by default. The sample metagenome table stratified by contributing ASVs can optionally also be output.

The sequence abundances should be in read counts and not relative abundances. It will normalize the input sequence abundance table by the predicted number of marker genes. It will then determine the predicted functional profiles per sample. Output stratified by sequence ids (i.e. taxonomic contributors) will also be output if the --strat_out option is used. Also, rare ASVs can be collapsed into the same category in the stratified output table based on the --min_reads and --min_samples options. Note the output files are tab-delimited even if the input files was in BIOM format. The normalized sequence abundance table and the weighted nearest-sequenced taxon index values per-sample will also be output to the output directory as separate files.

Input
=====
Table of sequence abundances (BIOM, TSV, or mothur shared file format).

Output
======
Metagenome predictions:
  1. Predicted per-sample metagenome functional profiles
  2. Normalized sequence abundance table
  3. Weighted nearest-sequenced taxon index (NSTI) values per-sample
  4. When chosen within the tool's parameters: Predicted per-sample metagenome functional profiles, stratified by sequence ids (i.e. taxonomic contributors)
  5. When chosen within the tool's parameters: Predicted per-sample metagenome functional profiles, wide-format stratified by sequence ids (i.e. taxonomic contributors)

    ]]></help>
    <citations>
        <citation type="doi">10.1038/s41587-020-0548-6</citation>
    </citations>
</tool>