changeset 9:6000b8a8dd9d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit a5ae2f86b2955290a4c81ab234f02cc51020f970
author iuc
date Thu, 13 Mar 2025 09:49:14 +0000
parents 0b9d4ec77c4f
children
files macros.xml phyloseq_plot_bar.R static/images/asv_fill_plot.png static/images/facet_plot.png static/images/standard_plot.png test-data/expected_output_01.png test-data/expected_output_02.png test-data/expected_output_03.png test-data/expected_output_04.png test-data/expected_output_05.png test-data/expected_output_06.png
diffstat 11 files changed, 102 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Tue Feb 04 14:39:44 2025 +0000
+++ b/macros.xml	Thu Mar 13 09:49:14 2025 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">1.50.0</token>
-    <token name="@VERSION_SUFFIX@">2</token>
+    <token name="@VERSION_SUFFIX@">3</token>
     <token name="@PROFILE@">21.01</token>
     <xml name="bio_tools">
         <xrefs>
--- a/phyloseq_plot_bar.R	Tue Feb 04 14:39:44 2025 +0000
+++ b/phyloseq_plot_bar.R	Thu Mar 13 09:49:14 2025 +0000
@@ -4,6 +4,7 @@
 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("phyloseq"))
 suppressPackageStartupMessages(library("ggplot2"))
+suppressPackageStartupMessages(library("dplyr"))
 
 # Define options
 option_list <- list(
@@ -17,7 +18,7 @@
     ),
     make_option(c("--fill"),
         action = "store", dest = "fill", default = NULL,
-        help = "Variable for fill color (e.g., 'Genus', 'Order') (optional)"
+        help = "Variable for fill color (e.g., 'Genus', 'Order'). Use 'ASV' as argument to show each OTU/ASV."
     ),
     make_option(c("--facet"),
         action = "store", dest = "facet", default = NULL,
@@ -43,6 +44,10 @@
         action = "store_true", dest = "normalize", default = FALSE,
         help = "Normalize abundances to sum to 100% (optional)"
     ),
+    make_option(c("--normalize_x"),
+        action = "store_true", dest = "normalize_x", default = FALSE,
+        help = "Normalize x groups to sum up to 100%"
+    ),
     make_option(c("--width"),
         action = "store", dest = "width", default = 10,
         type = "numeric", help = "Width of the output plot in inches"
@@ -53,19 +58,21 @@
     ),
     make_option(c("--device"),
         action = "store", dest = "device", default = "pdf",
-        help = "Output format (e.g., 'pdf', 'png', 'jpeg')"
+        help = "Output format (e.g., 'pdf', 'png', 'jpg')"
     ),
     make_option(c("--nolines"),
-        type = "logical", default = FALSE,
+        action = "store_true", dest = "nolines", default = FALSE,
         help = "Remove borders (lines) around bars (TRUE/FALSE)"
     )
 )
 
+
 # Parse arguments
 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
 args <- parse_args(parser, positional_arguments = TRUE)
 opt <- args$options
 
+
 # Validate required options
 if (is.null(opt$input) || opt$input == "") {
     stop("Error: Input file is required.")
@@ -74,10 +81,28 @@
     stop("Error: Output file is required.")
 }
 
+if (is.null(opt$fill) || opt$fill == "") {
+    print(paste("No fill chosen using ASV"))
+    opt$fill <- "ASV"
+}
+
 # Load phyloseq object
 print(paste("Trying to read:", opt$input))
 physeq <- readRDS(opt$input)
 
+## Allow to use OTU as tax group
+# Extract rownames (taxids) from the tax_table and add them as a new column
+taxids <- rownames(tax_table(physeq))
+
+# Get the number of columns in the tax_table
+num_columns <- ncol(tax_table(physeq))
+
+# Add the taxids as a new last column in the tax_table
+tax_table(physeq) <- cbind(tax_table(physeq), taxid = taxids)
+
+# Rename the last column to 'ASV' / OTU does conflict with phyloseq logic
+colnames(tax_table(physeq))[num_columns + 1] <- "ASV"
+
 # Normalize to relative abundances if requested
 if (opt$normalize) {
     print("Normalizing abundances to sum to 100%...")
@@ -85,16 +110,22 @@
 }
 
 # Debug: Check available taxonomic ranks
+
+tax_ranks <- colnames(tax_table(physeq))
+sample_vars <- colnames(sample_data(physeq))
+
 print("Available taxonomic ranks:")
-print(colnames(tax_table(physeq)))
+print(tax_ranks)
+
+print("Available metadata:")
+print(sample_vars)
 
 # Handle missing or unassigned taxa for all ranks
 if (opt$keepNonAssigned) {
     # Replace NA or empty values with 'Not Assigned' for all ranks
-    tax_ranks <- colnames(tax_table(physeq))
 
     for (rank in tax_ranks) {
-        if (rank %in% colnames(tax_table(physeq))) {
+        if (rank %in% tax_ranks) {
             # replace NA or empty values with 'Not Assigned'
             tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned"
         }
@@ -124,7 +155,9 @@
 
     otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa]
 
+    # Group non-top OTUs as 'Others' if requested
     if (opt$keepOthers) {
+        # Update the tax_table to assign 'Others' to non-top taxa
         tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others"
         physeq <- physeq_agg
     } else {
@@ -132,30 +165,76 @@
     }
 }
 
+
+# normalize x groups if needed
+if (opt$x %in% sample_vars) {
+    if (opt$normalize_x && !is.null(opt$x) && opt$x != "") {
+        physeq_agg <- merge_samples(physeq, opt$x)
+
+        physeq <- transform_sample_counts(physeq_agg, function(x) (x / sum(x) * 100))
+        opt$x <- NULL # set to Null since we do not need x for downstream now
+        opt$facet <- NULL # set to Null since facetting does not work with normalize x
+        warning(paste("normalize x does not work with facetting"))
+    }
+} else {
+    warning(paste("x", opt$x, "not found in sample data. Skipping normalize_x."))
+}
+
+
+# Check if the facet variable is valid and exists
+facet_var <- NULL
+if (!is.null(opt$facet) && opt$facet != "") {
+    if (opt$facet %in% sample_vars || opt$facet %in% tax_ranks) {
+        facet_var <- opt$facet # Store facet variable for later
+    } else {
+        warning(paste("Facet variable", opt$facet, "not found in sample data or tax ranks. Skipping faceting."))
+    }
+}
+
+# Determine if faceting is needed
+facet_formula <- if (!is.null(facet_var)) as.formula(paste("~", facet_var)) else NULL
+
+# Define color based on the `nolines` option
+plot_color <- ifelse(opt$nolines, NA, "black")
+
 # Generate bar plot
 if (!is.null(opt$x) && opt$x != "") {
-    p <- plot_bar(physeq, x = opt$x, fill = opt$fill) +
-        geom_bar(aes(fill = !!sym(opt$fill)),
-            stat = "identity", position = "stack",
-            color = ifelse(opt$nolines, NA, "black")
+    p <- plot_bar(physeq,
+        x = opt$x,
+        fill = opt$fill
+    ) + facet_wrap(facet_formula, scales = "free_x") +
+        geom_bar(
+            stat = "identity",
+            position = "stack",
+            aes(fill = !!sym(opt$fill)),
+            color = plot_color
         )
 } else {
-    p <- plot_bar(physeq, fill = opt$fill) +
-        geom_bar(aes(fill = !!sym(opt$fill)),
-            stat = "identity", position = "stack",
-            color = ifelse(opt$nolines, NA, "black")
+    p <- plot_bar(physeq,
+        fill = opt$fill
+    ) + facet_wrap(facet_formula, scales = "free_x") +
+        geom_bar(
+            stat = "identity",
+            position = "stack",
+            aes(fill = !!sym(opt$fill)),
+            color = plot_color
         )
 }
 
-# Optional: Add faceting if specified
-if (!is.null(opt$facet) && opt$facet != "") {
-    sample_vars <- colnames(sample_data(physeq))
-    if (opt$facet %in% sample_vars) {
-        p <- p + facet_wrap(as.formula(paste("~", opt$facet)))
-    } else {
-        warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting."))
-    }
+
+# Reorder fill levels to ensure "Not Assigned" and "Others" are at the bottom if they exist
+fill_values <- unique(p$data[[opt$fill]]) # Get unique fill values
+new_levels <- setdiff(fill_values, c("Not Assigned", "Others")) # Exclude "Not Assigned" and "Others"
+
+if ("Not Assigned" %in% fill_values) {
+    new_levels <- c("Not Assigned", new_levels) # Place "Not Assigned" at the bottom if it exists
 }
+if ("Others" %in% fill_values) {
+    new_levels <- c("Others", new_levels) # Place "Others" at the bottom if it exists
+}
+
+# Apply the new levels to the fill variable in the plot data
+p$data[[opt$fill]] <- factor(p$data[[opt$fill]], levels = new_levels)
 
 # Save to output file
 ggsave(
Binary file static/images/asv_fill_plot.png has changed
Binary file static/images/facet_plot.png has changed
Binary file static/images/standard_plot.png has changed
Binary file test-data/expected_output_01.png has changed
Binary file test-data/expected_output_02.png has changed
Binary file test-data/expected_output_03.png has changed
Binary file test-data/expected_output_04.png has changed
Binary file test-data/expected_output_05.png has changed
Binary file test-data/expected_output_06.png has changed