Mercurial > repos > iuc > ancombc

<tool id="ancombc" name="ANCOM-BC" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
  <description>differential abundance analysis</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
mkdir 'output_dir' &&

Rscript '${__tool_directory__}/ancombc.R'
--phyloseq '$phyloseq'
--formula '$formula'
--p_adj_method '$p_adj_method'
--zero_cut $zero_cut
--lib_cut $lib_cut
--group '$group'
--struc_zero '$struc_zero'
--neg_lb '$neg_lb'
--tol $tol
--max_iter $max_iter
--conserve '$conserve'
--alpha $alpha
--global '$global'
--output_dir 'output_dir'
    ]]></command>
    <inputs>
        <param argument="--phyloseq" type="data" format="phyloseq" label="File containing a phyloseq object"/>
        <param argument="--formula" type="text" value="" label="Formula" help="Expresses how the microbial absolute abundances for each taxon depend on the variables in the metadata">
            <expand macro="sanitize_query"/>
        </param>
        <param argument="--p_adj_method" type="select" label="Select method to adjust p-values">
            <option value="holm" selected="true">holm</option>
            <option value="hochberg">hochberg</option>
            <option value="hommel">hommel</option>
            <option value="bonferroni">bonferroni</option>
            <option value="BH">BH</option>
            <option value="BY">BY</option>
            <option value="fdr">fdr</option>
            <option value="none">none</option>
        </param>
        <param argument="--zero_cut" type="float" value="0.1" min="0" max="1" label="Minimum taxa prevalence" help="Taxa with prevalences less than this will be excluded"/>
        <param argument="--lib_cut" type="integer" value="0" min="0" label="Threshold for filtering samples based on library sizes" help="Samples with library sizes less than this will be excluded"/>
        <param argument="--group" type="text" value="" label="Name of the group variable in the metadata" help="Group should be discrete, specifying group is required for detecting structural zeros and performing global test"/>
        <param argument="--struc_zero" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Detect structural zeros based on group?"/>
        <param argument="--neg_lb" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Classify a taxon as a structural zero using its asymptotic lower bound?"/>
        <param argument="--tol" type="float" value="0.00001" min="0" label="Iteration convergence tolerance for the E-M algorithm"/>
        <param argument="--max_iter" type="integer" value="100" min="1" label="Maximum number of iterations for the E-M algorithm"/>
        <param argument="--conserve" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Use a conservative variance estimator for the test statistic?" help="Recommended if the sample size is small and/or the number of differentially abundant taxa is believed to be large"/>
        <param argument="--alpha" type="float" value="0.05" min="0" label="Level of significance"/>
        <param argument="--global" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Perform global test?"/>
    </inputs>
    <outputs>
        <collection name="output_collection" type="list">
            <discover_datasets pattern="__name_and_ext__" directory="output_dir"/>
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="phyloseq" value="input1.phyloseq" ftype="phyloseq"/>
            <param name="formula" value="patient_status"/>
            <param name="p_adj_method" value="fdr"/>
            <param name="zero_cut" value="0.9"/>
            <param name="group" value="patient_status"/>
            <param name="struc_zero" value="true"/>
            <param name="neg_lb" value="true"/>
            <param name="conserve" value="true"/>
            <param name="global" value="true"/>
            <output_collection name="output_collection" type="list" count="13">
                <element name="delta_em" ftype="tabular">
                    <assert_contents>
                        <has_text text="0.2855054"/>
                    </assert_contents>
                </element>
                <element name="delta_wls" ftype="tabular">
                    <assert_contents>
                        <has_text text="0.2992838"/>
                    </assert_contents>
                </element>
                <element name="feature_table" ftype="tabular">
                    <assert_contents>
                        <has_text text="A110"/>
                        <has_size value="4270" delta="100"/>
                    </assert_contents>
                </element>
                <element name="res_W" ftype="tabular">
                    <assert_contents>
                        <has_text text="patient_statusControl"/>
                        <has_size value="1364" delta="100"/>
                    </assert_contents>
                </element>
                <element name="res_beta" ftype="tabular">
                    <assert_contents>
                        <has_text text="patient_statusControl"/>
                        <has_size value="1369" delta="100"/>
                    </assert_contents>
                </element>
                <element name="res_diff_abn" ftype="tabular">
                    <assert_contents>
                        <has_text text="FALSE"/>
                        <has_size value="760" delta="100"/>
                    </assert_contents>
                </element>
                <element name="res_global" ftype="tabular">
                    <assert_contents>
                        <has_size value="1"/>
                    </assert_contents>
                </element>
                <element name="res_p_val" ftype="tabular">
                    <assert_contents>
                        <has_text text="patient_statusControl"/>
                        <has_size value="1180" delta="100"/>
                    </assert_contents>
                </element>
                <element name="res_q_val" ftype="tabular">
                    <assert_contents>
                        <has_text text="patient_statusControl"/>
                        <has_size value="1155" delta="100"/>
                    </assert_contents>
                </element>
                <element name="res_se" ftype="tabular">
                    <assert_contents>
                        <has_text text="patient_statusControl"/>
                        <has_size value="1348" delta="100"/>
                    </assert_contents>
                </element>
                <element name="resid" ftype="tabular">
                    <assert_contents>
                        <has_text text="A110"/>
                        <has_size value="24241" delta="100"/>
                    </assert_contents>
                </element>
                <element name="samp_frac" ftype="tabular">
                    <assert_contents>
                        <has_text text="x"/>
                        <has_size value="501" delta="100"/>
                    </assert_contents>
                </element>
                <element name="zero_ind" ftype="tabular">
                    <assert_contents>
                        <has_text text="structural_zero"/>
                        <has_size value="1113" delta="100"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
    </tests>
    <help>
**What it does**

Performs a differential abundance analysis for microbiome data. Microbiome data are typically subject
to two sources of biases: unequal sampling fractions (sample-specific biases) and differential sequencing
efficiencies (taxon-specific biases). ANCOMBC package includes methodologies that aim to correct these
biases and construct statistically consistent estimators.

A detaset containing a phyloseq object is a required input.  The phyloseq object must consist of a feature table (microbial observed abundance table), a sample metadata, a taxonomy table (optional), and a phylogenetic tree (optional). The row names of the metadata must match the sample names of the feature table, and the row names of the taxonomy table must match the taxon (feature) names of the feature table.

The tool produces a collection consisting of the following items.

 * **feature_table** - a pre-processed (based on --zero_cut and --lib_cut) microbial observed abundance table
 * **zero_ind** - a logical matrix with TRUE indicating the taxon is identified as a structural zero for the specified group variable
 * **samp_frac** - a numeric vector of estimated sampling fractions in log scale (natural log) - if any sample contains missing values for any variable specified in the formula, the corresponding sampling fraction estimate for this sample will be NA since the sampling fraction is not estimable with the presence of missing values
 * **resid** - a matrix of residuals from the ANCOM-BC log-linear (natural log) model - rows are taxa and columns are samples
 * **delta_em** - estimated sample-specific biases through E-M algorithm
 * **delta_wls** - estimated sample-specific biases through weighted least squares (WLS) algorithm
 * **res_lfc** - a table of log fold changes obtained from the ANCOM-BC log-linear (natural log) model
 * **res_se** - a table of standard errors (SEs) of lfc
 * **res_W** - a table of test statistics. W = lfc/se
 * **res_pval** - a table of p-values obtained from two-sided Z-test using the test statistic W
 * **res_qval** - a table of adjusted p-values obtained by applying p_adj_method to p_val
 * **res_diff_abn** - a table of logical values; TRUE if the taxon has q_val less than alpha
 * **res_global** - a table containing the ANCOM-BC global test result for the variable specified in the group
    </help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Wed, 01 Nov 2023 08:48:16 +0000
parents	939c59ab61cf
children