Mercurial > repos > iuc > phyloseq_from_dada2
changeset 9:6000b8a8dd9d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit a5ae2f86b2955290a4c81ab234f02cc51020f970
author | iuc |
---|---|
date | Thu, 13 Mar 2025 09:49:14 +0000 |
parents | 0b9d4ec77c4f |
children | |
files | macros.xml phyloseq_plot_bar.R static/images/asv_fill_plot.png static/images/facet_plot.png static/images/standard_plot.png test-data/expected_output_01.png test-data/expected_output_02.png test-data/expected_output_03.png test-data/expected_output_04.png test-data/expected_output_05.png test-data/expected_output_06.png |
diffstat | 11 files changed, 102 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Tue Feb 04 14:39:44 2025 +0000 +++ b/macros.xml Thu Mar 13 09:49:14 2025 +0000 @@ -1,6 +1,6 @@ <macros> <token name="@TOOL_VERSION@">1.50.0</token> - <token name="@VERSION_SUFFIX@">2</token> + <token name="@VERSION_SUFFIX@">3</token> <token name="@PROFILE@">21.01</token> <xml name="bio_tools"> <xrefs>
--- a/phyloseq_plot_bar.R Tue Feb 04 14:39:44 2025 +0000 +++ b/phyloseq_plot_bar.R Thu Mar 13 09:49:14 2025 +0000 @@ -4,6 +4,7 @@ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("phyloseq")) suppressPackageStartupMessages(library("ggplot2")) +suppressPackageStartupMessages(library("dplyr")) # Define options option_list <- list( @@ -17,7 +18,7 @@ ), make_option(c("--fill"), action = "store", dest = "fill", default = NULL, - help = "Variable for fill color (e.g., 'Genus', 'Order') (optional)" + help = "Variable for fill color (e.g., 'Genus', 'Order'). Use 'ASV' as argument to show each OTU/ASV." ), make_option(c("--facet"), action = "store", dest = "facet", default = NULL, @@ -43,6 +44,10 @@ action = "store_true", dest = "normalize", default = FALSE, help = "Normalize abundances to sum to 100% (optional)" ), + make_option(c("--normalize_x"), + action = "store_true", dest = "normalize_x", default = FALSE, + help = "Normalize x groups to sum up to 100%" + ), make_option(c("--width"), action = "store", dest = "width", default = 10, type = "numeric", help = "Width of the output plot in inches" @@ -53,19 +58,21 @@ ), make_option(c("--device"), action = "store", dest = "device", default = "pdf", - help = "Output format (e.g., 'pdf', 'png', 'jpeg')" + help = "Output format (e.g., 'pdf', 'png', 'jpg')" ), make_option(c("--nolines"), - type = "logical", default = FALSE, + action = "store_true", dest = "nolines", default = FALSE, help = "Remove borders (lines) around bars (TRUE/FALSE)" ) ) + # Parse arguments parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) args <- parse_args(parser, positional_arguments = TRUE) opt <- args$options + # Validate required options if (is.null(opt$input) || opt$input == "") { stop("Error: Input file is required.") @@ -74,10 +81,28 @@ stop("Error: Output file is required.") } +if (is.null(opt$fill) || opt$fill == "") { + print(paste("No fill chosen using ASV")) + opt$fill <- "ASV" +} + # Load phyloseq object print(paste("Trying to read:", opt$input)) physeq <- readRDS(opt$input) +## Allow to use OTU as tax group +# Extract rownames (taxids) from the tax_table and add them as a new column +taxids <- rownames(tax_table(physeq)) + +# Get the number of columns in the tax_table +num_columns <- ncol(tax_table(physeq)) + +# Add the taxids as a new last column in the tax_table +tax_table(physeq) <- cbind(tax_table(physeq), taxid = taxids) + +# Rename the last column to 'ASV' / OTU does conflict with phyloseq logic +colnames(tax_table(physeq))[num_columns + 1] <- "ASV" + # Normalize to relative abundances if requested if (opt$normalize) { print("Normalizing abundances to sum to 100%...") @@ -85,16 +110,22 @@ } # Debug: Check available taxonomic ranks + +tax_ranks <- colnames(tax_table(physeq)) +sample_vars <- colnames(sample_data(physeq)) + print("Available taxonomic ranks:") -print(colnames(tax_table(physeq))) +print(tax_ranks) + +print("Available metadata:") +print(sample_vars) # Handle missing or unassigned taxa for all ranks if (opt$keepNonAssigned) { # Replace NA or empty values with 'Not Assigned' for all ranks - tax_ranks <- colnames(tax_table(physeq)) for (rank in tax_ranks) { - if (rank %in% colnames(tax_table(physeq))) { + if (rank %in% tax_ranks) { # replace NA or empty values with 'Not Assigned' tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned" } @@ -124,7 +155,9 @@ otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] + # Group non-top OTUs as 'Others' if requested if (opt$keepOthers) { + # Update the tax_table to assign 'Others' to non-top taxa tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others" physeq <- physeq_agg } else { @@ -132,30 +165,76 @@ } } + +# normalize x groups if needed +if (opt$x %in% sample_vars) { + if (opt$normalize_x && !is.null(opt$x) && opt$x != "") { + physeq_agg <- merge_samples(physeq, opt$x) + + physeq <- transform_sample_counts(physeq_agg, function(x) (x / sum(x) * 100)) + opt$x <- NULL # set to Null since we do not need x for downstream now + opt$facet <- NULL # set to Null since facetting does not work with normalize x + warning(paste("normalize x does not work with facetting")) + } +} else { + warning(paste("x", opt$x, "not found in sample data. Skipping normalize_x.")) +} + + +# Check if the facet variable is valid and exists +facet_var <- NULL +if (!is.null(opt$facet) && opt$facet != "") { + if (opt$facet %in% sample_vars || opt$facet %in% tax_ranks) { + facet_var <- opt$facet # Store facet variable for later + } else { + warning(paste("Facet variable", opt$facet, "not found in sample data or tax ranks. Skipping faceting.")) + } +} + +# Determine if faceting is needed +facet_formula <- if (!is.null(facet_var)) as.formula(paste("~", facet_var)) else NULL + +# Define color based on the `nolines` option +plot_color <- ifelse(opt$nolines, NA, "black") + # Generate bar plot if (!is.null(opt$x) && opt$x != "") { - p <- plot_bar(physeq, x = opt$x, fill = opt$fill) + - geom_bar(aes(fill = !!sym(opt$fill)), - stat = "identity", position = "stack", - color = ifelse(opt$nolines, NA, "black") + p <- plot_bar(physeq, + x = opt$x, + fill = opt$fill + ) + facet_wrap(facet_formula, scales = "free_x") + + geom_bar( + stat = "identity", + position = "stack", + aes(fill = !!sym(opt$fill)), + color = plot_color ) } else { - p <- plot_bar(physeq, fill = opt$fill) + - geom_bar(aes(fill = !!sym(opt$fill)), - stat = "identity", position = "stack", - color = ifelse(opt$nolines, NA, "black") + p <- plot_bar(physeq, + fill = opt$fill + ) + facet_wrap(facet_formula, scales = "free_x") + + geom_bar( + stat = "identity", + position = "stack", + aes(fill = !!sym(opt$fill)), + color = plot_color ) } -# Optional: Add faceting if specified -if (!is.null(opt$facet) && opt$facet != "") { - sample_vars <- colnames(sample_data(physeq)) - if (opt$facet %in% sample_vars) { - p <- p + facet_wrap(as.formula(paste("~", opt$facet))) - } else { - warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting.")) - } + +# Reorder fill levels to ensure "Not Assigned" and "Others" are at the bottom if they exist +fill_values <- unique(p$data[[opt$fill]]) # Get unique fill values +new_levels <- setdiff(fill_values, c("Not Assigned", "Others")) # Exclude "Not Assigned" and "Others" + +if ("Not Assigned" %in% fill_values) { + new_levels <- c("Not Assigned", new_levels) # Place "Not Assigned" at the bottom if it exists } +if ("Others" %in% fill_values) { + new_levels <- c("Others", new_levels) # Place "Others" at the bottom if it exists +} + +# Apply the new levels to the fill variable in the plot data +p$data[[opt$fill]] <- factor(p$data[[opt$fill]], levels = new_levels) # Save to output file ggsave(