view humann_renorm_table.xml @ 0:f76163304dab draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
author thanhlv
date Mon, 13 Feb 2023 16:21:18 +0000
parents
children
line wrap: on
line source

<tool id="humann_renorm_table" name="Renormalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" >
    <description>a HUMAnN generated table</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="requirements"/>
    <expand macro="version"/>
    <command detect_errors="exit_code"><![CDATA[
humann_renorm_table
    --input '$input'
    -o '$output'
    --units '$units'
    --mode '$mode'
    --special '$special'
    $update_snames
    ]]></command>
    <inputs>
        <param argument="--input" type="data" format="tsv,tabular" label="Gene/pathway table"/>
        <param argument="--units" type="select" label="Normalization scheme">
            <option value="cpm" selected="true">Copies per million</option>
            <option value="relab">Relative abundance</option>
        </param>
        <param argument="--mode" type="select" label="Normalization level">
            <option value="community" selected="true">Normalization of all levels by community total</option>
            <option value="levelwise">Normalization of all levels by levelwise total</option>
        </param>
        <param argument="--special" type='boolean' truevalue='y' falsevalue='n' checked="true" label="Include the special features UNMAPPED, UNINTEGRATED, and UNGROUPED?"/>
        <param argument="--update-snames" type='boolean' truevalue='--update-snames' falsevalue='' checked="true" label="Update '-RPK' in sample names to appropriate suffix?"/>
    </inputs>
    <outputs>
        <data format="tabular" name="output"/>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input" value="demo_pathabundance.tsv"/>
            <param name="units" value="cpm"/>
            <param name="mode" value="community"/>
            <param name="special" value="n"/>
            <param name="update_snames" value=""/>
            <output name="output" ftype="tabular" file="cpm_community_renormalized_pathway_abundance.tsv">
                <assert_contents>
                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis" />
                    <has_text text="578694" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input" value="demo_pathabundance.tsv"/>
            <param name="units" value="relab"/>
            <param name="mode" value="levelwise"/>
            <param name="special" value="y"/>
            <param name="update_snames" value=""/>
            <output name="output" ftype="tabular" file="relab_levelwise_renormalized_pathway_abundance.tsv">
                <assert_contents>
                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified" />
                    <has_text text="0.630281" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@

HUMAnN quantifies genes and pathways in units of RPKs (reads per kilobase). These account for gene length but not sample sequencing depth. 
While there are some applications, e.g. strain profiling, where RPK units are superior to depth-normalized units, most of the 
time a user will renormalize their samples prior to downstream analysis.

This tool provides the choice to normalize to relative abundance or copies per million (CPM) units. Both of these represent 
"total sum scaling (TSS)"-style normalization: in the former case, each sample is constrained to sum to 1, whereas in the 
latter case (CPMs) samples are constrained to sum to 1 million. Units out of 1 million are often more convenient for tables
with many, many features (such as genefamilies.tsv tables).

Note: CPM as used here does not refer to unnormalized COUNTS per million, but rather copies per million.
CPMs as used here are a generic analog of the TPM (transcript per million) unit in RNA-seq. You may wish to use the 
abbreviation CoPM for added clarity.

By default, this tool normalizes all stratification levels to the sum of all community feature totals, but other options 
(such as level-wise normalization) are supported. "Special" features (such as UNMAPPED) can be included or excluded in the 
normalization process.
    ]]></help>
    <expand macro="citations"/>
</tool>