view hsp.xml @ 1:3b133b8d578a draft default tip

planemo upload for repository https://github.com/picrust/picrust2 commit f8c32a316582ff102c9b7edf3817000691ef9eea
author iuc
date Tue, 13 Aug 2024 12:11:12 +0000
parents 6eafc1e01a8c
children
line wrap: on
line source

<tool id="picrust2_hsp" name="PICRUSt2 Hidden state prediction (HSP)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>to predict gene family abundances</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tool"/>
    <expand macro="requirements"/>
    <version_command>hsp.py -v</version_command>
    <command detect_errors="exit_code"><![CDATA[
@VAR_ACCESS_FOO@
hsp.py
    --tree '$tree'
    @HSP_PARAMS@
    --output '$prediction_output'
    $check
    ## not implemented --chunk_size hoping that the default is carefully chosen
    ## otherwise one might need to compute a "good" value from the number of
    ## entries in the trait table and the number of available processors
    -p "\${GALAXY_SLOTS:-1}"
    ]]></command>
    <inputs>
        <param argument="--tree" type="data" format="newick" label="Newick tree with study sequences placed amongst reference sequences" help="The full reference tree containing both study sequences (i.e. ASVs or OTUs) and reference sequences."/>
        <expand macro="hsp_params" nsti_truevalue="--calculate_NSTI" nsti_checked="false" in_trait_arg="--in_trait" in_trait_multiple="false" in_trait_label_suff="">
            <token name="add_default_traits">
                <option value="16S">16S</option>
                <option value="PHENO">PHENO</option>
            </token>
            <token name="custom_traits">
                <param argument="--observed_trait_table" type="data" format="tabular" label="Customized trait table" help="Describes directly observed traits (e.g. sequenced genomes) in tab-delimited format. "/>
            </token>
            <!-- add the seed parameter here (for the emp_prob option) .. param absent in picrust2_pipeline -->
            <param argument="--seed" type="integer" value="100" label="Seed to make output reproducible" help="is necessary for the emp_prob method"/>
        </expand>
        <param argument="--check" type="boolean" truevalue="--check" falsevalue="" checked="false" label="Check input trait table before HSP"/>
    </inputs>
    <outputs>
        <data name="prediction_output" format="tabular"/>
    </outputs>
    <tests>
    <test expect_num_outputs="1">
        <param name="tree" ftype="newick" value="out_tree.zip"/>
        <conditional name="trait_input">
            <param name="selector" value="default"/>
            <param name="in_trait" value="16S"/>
        </conditional>
        <conditional name="hsp_method__options">
            <param name="hsp_method" value="mp"/>
            <param name="edge_exponent" value="0.0"/>
        </conditional>
        <param name="calculate_NSTI" value="false"/>
        <param name="check" value="false"/>
        <output name="prediction_output" ftype="tabular">
            <assert_contents>
                <has_text text="sequence"/>
                <has_n_lines n="38"/>
            </assert_contents>
        </output>
    </test>
    <test expect_num_outputs="1">
        <param name="tree" ftype="newick" value="out_tree.zip"/>
        <conditional name="trait_input">
            <param name="selector" value="custom"/>
            <param name="observed_trait_table" value="known_traits.tsv.gz"/>
        </conditional>
        <conditional name="hsp_method__options">
            <param name="hsp_method" value="mp"/>
            <param name="edge_exponent" value="0.0"/>
        </conditional>
        <param name="calculate_NSTI" value="false"/>
        <param name="check" value="false"/>
        <output name="prediction_output">
            <assert_contents>
                <has_text text="2040502012"/>
                <has_n_lines n="20005"/>
            </assert_contents>
        </output>
    </test>
    <test expect_num_outputs="1">
        <param name="tree" ftype="newick" value="out_tree.zip"/>
        <conditional name="trait_input">
            <param name="selector" value="default"/>
            <param name="in_trait" value="16S"/>
        </conditional>
        <conditional name="hsp_method__options">
            <param name="hsp_method" value="emp_prob"/>
            <param name="seed" value="100"/>
        </conditional>
        <param name="calculate_NSTI" value="false"/>
        <param name="check" value="false"/>
        <output name="prediction_output">
            <assert_contents>
                <has_text text="sequence"/>
                <has_n_lines n="38"/>
            </assert_contents>
        </output>
    </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@

Hidden State Prediction (HSP)
=============================
Performs hidden state prediction on tips in the input tree with unknown trait values. Typically this script is used to predict the copy number of gene families present in the predicted genome for each amplicon sequence variant, given a tree and a set of known trait values. This script outputs a table of trait predictions.

Note
====
Run hidden-state prediction (hsp) to predict gene family abundances.

Input
=====
Newick tree with study sequences placed amongst reference sequences.

Output
======
Tabular file containing predicted counts.

    ]]></help>
    <citations>
        <citation type="doi">10.1038/s41587-020-0548-6</citation>
    </citations>
</tool>