# HG changeset patch # User iuc # Date 1658007604 0 # Node ID 939c59ab61cf81f8eb45e4870d778c378af9f86a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ancombc commit 045979180e44c683b5e0760f802af66c05abcae8 diff -r 000000000000 -r 939c59ab61cf ancombc.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ancombc.R Sat Jul 16 21:40:04 2022 +0000 @@ -0,0 +1,85 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("ANCOMBC")) +suppressPackageStartupMessages(library("data.table")) +suppressPackageStartupMessages(library("optparse")) + +option_list <- list( + make_option(c("--phyloseq"), action = "store", dest = "phyloseq", help = "File containing a phyloseq object"), + make_option(c("--formula"), action = "store", dest = "formula", help = "Formula"), + make_option(c("--p_adj_method"), action = "store", dest = "p_adj_method", help = "Method to adjust p-values"), + make_option(c("--zero_cut"), action = "store", dest = "zero_cut", type = "double", help = "Minimum taxa prevalence"), + make_option(c("--lib_cut"), action = "store", dest = "lib_cut", type = "integer", help = "Thrshold for filtering samples based on library sizes"), + make_option(c("--group"), action = "store", dest = "group", help = "Name of the group variable in the metadata"), + make_option(c("--struc_zero"), action = "store", dest = "struc_zero", help = "Detect structural zeros based on group"), + make_option(c("--neg_lb"), action = "store", dest = "neg_lb", help = "Classify a taxon as a structural zero using its asymptotic lower bound"), + make_option(c("--tol"), action = "store", dest = "tol", type = "double", help = "Iteration convergence tolerance for the E-M algorithm"), + make_option(c("--max_iter"), action = "store", dest = "max_iter", help = "Maximum number of iterations for the E-M algorithm"), + make_option(c("--conserve"), action = "store", dest = "conserve", help = "Use a conservative variance estimator for the test statistic"), + make_option(c("--alpha"), action = "store", dest = "alpha", help = "Level of significance"), + make_option(c("--global"), action = "store", dest = "global", help = "Perform global test"), + make_option(c("--output_dir"), action = "store", dest = "output_dir", help = "Output directory") +) + +parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) +args <- parse_args(parser, positional_arguments = TRUE) +opt <- args$options + +get_boolean_value <- function(val) { + if (val == "true") { + return(TRUE) + } else { + return(FALSE) + } +} + +get_file_path <- function(dir, file_name) { + file_path <- paste(dir, file_name, sep = "/") + return(file_path) +} + +write_data_frame <- function(dir, file_name, data_frame) { + file_path <- get_file_path(dir, file_name) + write.table(data_frame, file = file_path, quote = FALSE, row.names = TRUE, col.names = TRUE, sep = "\t") +} + +# Convert boolean values to boolean. +struc_zero <- get_boolean_value(opt$struc_zero) +neg_lb <- get_boolean_value(opt$neg_lb) +conserve <- get_boolean_value(opt$conserve) +global <- get_boolean_value(opt$global) + +# Construct a phyloseq object. +phyloseq_obj <- readRDS(opt$phyloseq) + +# Construct an ANCOM-BC object. +ancombc_obj <- ancombc(phyloseq = phyloseq_obj, + formula = opt$formula, + p_adj_method = opt$p_adj_method, + zero_cut = opt$zero_cut, + lib_cut = opt$lib_cut, + group = opt$group, + struc_zero = struc_zero, + neg_lb = neg_lb, + tol = opt$tol, + max_iter = opt$max_iter, + conserve = conserve, + alpha = opt$alpha, + global = global) + +res <- ancombc_obj$res + +# Write the outputs. +write_data_frame(opt$output_dir, "feature_table.tabular", ancombc_obj$feature_table) +write_data_frame(opt$output_dir, "zero_ind.tabular", ancombc_obj$zero_ind) +write.csv2(ancombc_obj$samp_frac, file = get_file_path(opt$output_dir, "samp_frac.tabular"), row.names = FALSE, col.names = FALSE, sep = "\t") +write_data_frame(opt$output_dir, "resid.tabular", ancombc_obj$resid) +write(ancombc_obj$delta_em, file = get_file_path(opt$output_dir, "delta_em.tabular")) +write(ancombc_obj$delta_wls, file = get_file_path(opt$output_dir, "delta_wls.tabular")) +write_data_frame(opt$output_dir, "res_beta.tabular", res$beta) +write_data_frame(opt$output_dir, "res_se.tabular", res$se) +write_data_frame(opt$output_dir, "res_W.tabular", res$W) +write_data_frame(opt$output_dir, "res_p_val.tabular", res$p_val) +write_data_frame(opt$output_dir, "res_q_val.tabular", res$q_val) +write_data_frame(opt$output_dir, "res_diff_abn.tabular", res$diff_abn) +write_data_frame(opt$output_dir, "res_global.tabular", ancombc_obj$res_global) diff -r 000000000000 -r 939c59ab61cf ancombc.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ancombc.xml Sat Jul 16 21:40:04 2022 +0000 @@ -0,0 +1,175 @@ + + differential abundance analysis + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Performs a differential abundance analysis for microbiome data. Microbiome data are typically subject +to two sources of biases: unequal sampling fractions (sample-specific biases) and differential sequencing +efficiencies (taxon-specific biases). ANCOMBC package includes methodologies that aim to correct these +biases and construct statistically consistent estimators. + +A detaset containing a phyloseq object is a required input. The phyloseq object must consist of a feature table (microbial observed abundance table), a sample metadata, a taxonomy table (optional), and a phylogenetic tree (optional). The row names of the metadata must match the sample names of the feature table, and the row names of the taxonomy table must match the taxon (feature) names of the feature table. + +The tool produces a collection consisting of the following items. + + * **feature_table** - a pre-processed (based on --zero_cut and --lib_cut) microbial observed abundance table + * **zero_ind** - a logical matrix with TRUE indicating the taxon is identified as a structural zero for the specified group variable + * **samp_frac** - a numeric vector of estimated sampling fractions in log scale (natural log) - if any sample contains missing values for any variable specified in the formula, the corresponding sampling fraction estimate for this sample will be NA since the sampling fraction is not estimable with the presence of missing values + * **resid** - a matrix of residuals from the ANCOM-BC log-linear (natural log) model - rows are taxa and columns are samples + * **delta_em** - estimated sample-specific biases through E-M algorithm + * **delta_wls** - estimated sample-specific biases through weighted least squares (WLS) algorithm + * **res_lfc** - a table of log fold changes obtained from the ANCOM-BC log-linear (natural log) model + * **res_se** - a table of standard errors (SEs) of lfc + * **res_W** - a table of test statistics. W = lfc/se + * **res_pval** - a table of p-values obtained from two-sided Z-test using the test statistic W + * **res_qval** - a table of adjusted p-values obtained by applying p_adj_method to p_val + * **res_diff_abn** - a table of logical values; TRUE if the taxon has q_val less than alpha + * **res_global** - a table containing the ANCOM-BC global test result for the variable specified in the group + + + + diff -r 000000000000 -r 939c59ab61cf macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sat Jul 16 21:40:04 2022 +0000 @@ -0,0 +1,29 @@ + + 1.4.0 + 0 + 21.01 + + + bioconductor-ancombc + r-data.table + r-optparse + + + + + + + + + + + + + + + 10.1038/s41467-020-17041-7 + 10.3402/mehd.v26.27663 + + + + diff -r 000000000000 -r 939c59ab61cf test-data/input1.phyloseq Binary file test-data/input1.phyloseq has changed