changeset 6:8a9c340debc8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit a5ae2f86b2955290a4c81ab234f02cc51020f970
author iuc
date Thu, 13 Mar 2025 09:49:02 +0000
parents 94563110275d
children e9442bd54200
files macros.xml phyloseq_plot_bar.R phyloseq_plot_bar.xml static/images/asv_fill_plot.png static/images/facet_plot.png static/images/standard_plot.png test-data/expected_output_01.png test-data/expected_output_02.png test-data/expected_output_03.png test-data/expected_output_04.png test-data/expected_output_05.png test-data/expected_output_06.png
diffstat 12 files changed, 218 insertions(+), 83 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Tue Feb 04 14:39:08 2025 +0000
+++ b/macros.xml	Thu Mar 13 09:49:02 2025 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">1.50.0</token>
-    <token name="@VERSION_SUFFIX@">2</token>
+    <token name="@VERSION_SUFFIX@">3</token>
     <token name="@PROFILE@">21.01</token>
     <xml name="bio_tools">
         <xrefs>
--- a/phyloseq_plot_bar.R	Tue Feb 04 14:39:08 2025 +0000
+++ b/phyloseq_plot_bar.R	Thu Mar 13 09:49:02 2025 +0000
@@ -4,6 +4,7 @@
 suppressPackageStartupMessages(library("optparse"))
 suppressPackageStartupMessages(library("phyloseq"))
 suppressPackageStartupMessages(library("ggplot2"))
+suppressPackageStartupMessages(library("dplyr"))
 
 # Define options
 option_list <- list(
@@ -17,7 +18,7 @@
     ),
     make_option(c("--fill"),
         action = "store", dest = "fill", default = NULL,
-        help = "Variable for fill color (e.g., 'Genus', 'Order') (optional)"
+        help = "Variable for fill color (e.g., 'Genus', 'Order'). Use 'ASV' as argument to show each OTU/ASV."
     ),
     make_option(c("--facet"),
         action = "store", dest = "facet", default = NULL,
@@ -43,6 +44,10 @@
         action = "store_true", dest = "normalize", default = FALSE,
         help = "Normalize abundances to sum to 100% (optional)"
     ),
+    make_option(c("--normalize_x"),
+        action = "store_true", dest = "normalize_x", default = FALSE,
+        help = "Normalize x groups to sum up to 100%"
+    ),
     make_option(c("--width"),
         action = "store", dest = "width", default = 10,
         type = "numeric", help = "Width of the output plot in inches"
@@ -53,19 +58,21 @@
     ),
     make_option(c("--device"),
         action = "store", dest = "device", default = "pdf",
-        help = "Output format (e.g., 'pdf', 'png', 'jpeg')"
+        help = "Output format (e.g., 'pdf', 'png', 'jpg')"
     ),
     make_option(c("--nolines"),
-        type = "logical", default = FALSE,
+        action = "store_true", dest = "nolines", default = FALSE,
         help = "Remove borders (lines) around bars (TRUE/FALSE)"
     )
 )
 
+
 # Parse arguments
 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
 args <- parse_args(parser, positional_arguments = TRUE)
 opt <- args$options
 
+
 # Validate required options
 if (is.null(opt$input) || opt$input == "") {
     stop("Error: Input file is required.")
@@ -74,10 +81,28 @@
     stop("Error: Output file is required.")
 }
 
+if (is.null(opt$fill) || opt$fill == "") {
+    print(paste("No fill chosen using ASV"))
+    opt$fill <- "ASV"
+}
+
 # Load phyloseq object
 print(paste("Trying to read:", opt$input))
 physeq <- readRDS(opt$input)
 
+## Allow to use OTU as tax group
+# Extract rownames (taxids) from the tax_table and add them as a new column
+taxids <- rownames(tax_table(physeq))
+
+# Get the number of columns in the tax_table
+num_columns <- ncol(tax_table(physeq))
+
+# Add the taxids as a new last column in the tax_table
+tax_table(physeq) <- cbind(tax_table(physeq), taxid = taxids)
+
+# Rename the last column to 'ASV' / OTU does conflict with phyloseq logic
+colnames(tax_table(physeq))[num_columns + 1] <- "ASV"
+
 # Normalize to relative abundances if requested
 if (opt$normalize) {
     print("Normalizing abundances to sum to 100%...")
@@ -85,16 +110,22 @@
 }
 
 # Debug: Check available taxonomic ranks
+
+tax_ranks <- colnames(tax_table(physeq))
+sample_vars <- colnames(sample_data(physeq))
+
 print("Available taxonomic ranks:")
-print(colnames(tax_table(physeq)))
+print(tax_ranks)
+
+print("Available metadata:")
+print(sample_vars)
 
 # Handle missing or unassigned taxa for all ranks
 if (opt$keepNonAssigned) {
     # Replace NA or empty values with 'Not Assigned' for all ranks
-    tax_ranks <- colnames(tax_table(physeq))
 
     for (rank in tax_ranks) {
-        if (rank %in% colnames(tax_table(physeq))) {
+        if (rank %in% tax_ranks) {
             # replace NA or empty values with 'Not Assigned'
             tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned"
         }
@@ -124,7 +155,9 @@
 
     otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa]
 
+    # Group non-top OTUs as 'Others' if requested
     if (opt$keepOthers) {
+        # Update the tax_table to assign 'Others' to non-top taxa
         tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others"
         physeq <- physeq_agg
     } else {
@@ -132,30 +165,76 @@
     }
 }
 
+
+# normalize x groups if needed
+if (opt$x %in% sample_vars) {
+    if (opt$normalize_x && !is.null(opt$x) && opt$x != "") {
+        physeq_agg <- merge_samples(physeq, opt$x)
+
+        physeq <- transform_sample_counts(physeq_agg, function(x) (x / sum(x) * 100))
+        opt$x <- NULL # set to Null since we do not need x for downstream now
+        opt$facet <- NULL # set to Null since facetting does not work with normalize x
+        warning(paste("normalize x does not work with facetting"))
+    }
+} else {
+    warning(paste("x", opt$x, "not found in sample data. Skipping normalize_x."))
+}
+
+
+# Check if the facet variable is valid and exists
+facet_var <- NULL
+if (!is.null(opt$facet) && opt$facet != "") {
+    if (opt$facet %in% sample_vars || opt$facet %in% tax_ranks) {
+        facet_var <- opt$facet # Store facet variable for later
+    } else {
+        warning(paste("Facet variable", opt$facet, "not found in sample data or tax ranks. Skipping faceting."))
+    }
+}
+
+# Determine if faceting is needed
+facet_formula <- if (!is.null(facet_var)) as.formula(paste("~", facet_var)) else NULL
+
+# Define color based on the `nolines` option
+plot_color <- ifelse(opt$nolines, NA, "black")
+
 # Generate bar plot
 if (!is.null(opt$x) && opt$x != "") {
-    p <- plot_bar(physeq, x = opt$x, fill = opt$fill) +
-        geom_bar(aes(fill = !!sym(opt$fill)),
-            stat = "identity", position = "stack",
-            color = ifelse(opt$nolines, NA, "black")
+    p <- plot_bar(physeq,
+        x = opt$x,
+        fill = opt$fill
+    ) + facet_wrap(facet_formula, scales = "free_x") +
+        geom_bar(
+            stat = "identity",
+            position = "stack",
+            aes(fill = !!sym(opt$fill)),
+            color = plot_color
         )
 } else {
-    p <- plot_bar(physeq, fill = opt$fill) +
-        geom_bar(aes(fill = !!sym(opt$fill)),
-            stat = "identity", position = "stack",
-            color = ifelse(opt$nolines, NA, "black")
+    p <- plot_bar(physeq,
+        fill = opt$fill
+    ) + facet_wrap(facet_formula, scales = "free_x") +
+        geom_bar(
+            stat = "identity",
+            position = "stack",
+            aes(fill = !!sym(opt$fill)),
+            color = plot_color
         )
 }
 
-# Optional: Add faceting if specified
-if (!is.null(opt$facet) && opt$facet != "") {
-    sample_vars <- colnames(sample_data(physeq))
-    if (opt$facet %in% sample_vars) {
-        p <- p + facet_wrap(as.formula(paste("~", opt$facet)))
-    } else {
-        warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting."))
-    }
+
+# Reorder fill levels to ensure "Not Assigned" and "Others" are at the bottom if they exist
+fill_values <- unique(p$data[[opt$fill]]) # Get unique fill values
+new_levels <- setdiff(fill_values, c("Not Assigned", "Others")) # Exclude "Not Assigned" and "Others"
+
+if ("Not Assigned" %in% fill_values) {
+    new_levels <- c("Not Assigned", new_levels) # Place "Not Assigned" at the bottom if it exists
 }
+if ("Others" %in% fill_values) {
+    new_levels <- c("Others", new_levels) # Place "Others" at the bottom if it exists
+}
+
+# Apply the new levels to the fill variable in the plot data
+p$data[[opt$fill]] <- factor(p$data[[opt$fill]], levels = new_levels)
 
 # Save to output file
 ggsave(
--- a/phyloseq_plot_bar.xml	Tue Feb 04 14:39:08 2025 +0000
+++ b/phyloseq_plot_bar.xml	Thu Mar 13 09:49:02 2025 +0000
@@ -10,45 +10,46 @@
 --input '$input'
 --x '$x'
 --fill '$fill'
---facet '${facet}'
+--facet '$facet'
+--topX '$topX'
+'$keepOthers'
+'$keepNonAssigned'
+'$normalize'
+'$normalize_x'
+'$nolines'
+--width '$width'
+--height '$height'
+--device '$device'
 --output '$output'
---topX '${topX}'
---keepOthers '${keepOthers}'
---keepNonAssigned '${keepNonAssigned}'
---normalize '${normalize}'
---width '${width}'
---height '${height}'
---device '${device}'
---nolines '${nolines}'
   ]]></command>
   <inputs>
     <expand macro="phyloseq_input"/>
-    <param name="x" type="text" optional="true" label="X-axis variable" help="Variable for the x-axis (e.g., Sample, Phylum). If not specified, the Samples are taken." />
-    <param name="fill" type="text" label="Fill variable" help="Variable to color the bars (e.g., Genus, Order)." />
+    <param name="x" type="text" optional="true" label="X-axis variable" help="Variable for the x-axis (Metadata columns). If not specified, the Samples are taken." />
+    <param name="fill" type="text" label="Fill variable" help="Variable to color the bars (e.g., Genus, Species). Use 'ASV' as argument to show each OTU/ASV." />
     <param name="facet" type="text" optional="true" label="Facet by variable" help="Variable to facet the chart by (e.g., SampleType)." />
     <param name="topX" value="10" type="integer" optional="true" label="Top X" help="Only show the ranks with the top X abundance." />
-    <param name="keepOthers" type="boolean" label="Keep 'Others'" help="Keep OTUs which are not in top X as 'Others'." />
-    <param name="keepNonAssigned" type="boolean" label="Keep Non Assigned" help="Keep OTUs that are not assigned at this rank and label as 'Not Assigned'." />
-    <param name="normalize" type="boolean" label="Normalize" help="Normalize abundances to sum to 100%. Normalization is performed before Top X selection." />
+    <param name="keepOthers" type="boolean" truevalue="--keepOthers" falsevalue="" label="Keep 'Others'" help="Keep OTUs which are not in top X as 'Others'." />
+    <param name="keepNonAssigned" type="boolean" truevalue="--keepNonAssigned" falsevalue="" label="Keep 'Not Assigned'" help="Keep OTUs that are not assigned at this rank and label as 'Not Assigned'." />
+    <param name="normalize" type="boolean" truevalue="--normalize" falsevalue="" label="Normalize" help="Normalize abundances to sum to 100%. Normalization is performed before Top X selection." />
+    <param name="normalize_x" type="boolean" truevalue="--normalize_x" falsevalue="" label="Normalize x" help="Normalize abundances of the x group to sum to 100%. Normalization is performed before Top X selection. Ignores facetting!" />
+    <param name="nolines" type="boolean" truevalue="--nolines" falsevalue="" label="Remove Separation Lines" help="Do not add OTU separation lines in the bars." />
     <param name="width" type="float" value="10" optional="true" label="Plot Width" help="Width of the output plot in inches." />
     <param name="height" type="float" value="8" optional="true" label="Plot Height" help="Height of the output plot in inches." />
-    <param name="device" type="select" value="pdf" label="Output Device" help="Device to use for the output file. Options include pdf, png, and others.">
+    <param name="device" type="select" value="pdf" label="Output Device" help="File format to use for the output file. ">
       <option value="pdf">PDF</option>
       <option value="png">PNG</option>
-      <option value="jpeg">JPEG</option>
+      <option value="jpg">JEG</option>
       <option value="tiff">TIFF</option>
     </param>
-    <param name="nolines" type="boolean" label="Remove Separation Lines" help="Do not add separation lines between bars in the chart." />
   </inputs>
   <outputs>
     <data name="output" format="pdf" label="Bar Chart (${device})">
       <change_format>
-        <when format="pdf" value="pdf"/>
-        <when format="png" value="png"/>
-        <when format="jpeg" value="jpeg"/>
-        <when format="tiff" value="tiff"/>
+        <when input="device" format="pdf" value="pdf"/>
+        <when input="device" format="png" value="png"/>
+        <when input="device" format="jpg" value="jpg"/>
+        <when input="device" format="tiff" value="tiff"/>
       </change_format>
-
     </data>
   </outputs>
 
@@ -58,13 +59,8 @@
       <param name="input" value="output.phyloseq" ftype="phyloseq"/>
       <param name="x" value="Property"/>
       <param name="fill" value="Phylum"/>
-      <param name="device" value="pdf"/>
-      <output name="output" ftype="pdf">
-        <assert_contents>
-          <has_text text="%PDF"/>
-          <has_text text="%%EOF"/>
-        </assert_contents>
-      </output>
+      <param name="device" value="png"/>
+      <output name="output" file="expected_output_01.png" ftype="png" compare="image_diff"/>
     </test>
 
     <!-- Test 2: TopX filtering and normalization -->
@@ -72,34 +68,31 @@
       <param name="input" value="output.phyloseq" ftype="phyloseq"/>
       <param name="x" value="Property"/>
       <param name="fill" value="Genus"/>
-      <param name="facet" value="SampleType"/>
+      <param name="facet" value="Number"/>
       <param name="topX" value="10"/>
       <param name="normalize" value="true"/>
-      <output name="output" ftype="pdf">
-        <assert_contents>
-          <has_text text="%PDF"/>
-          <has_text text="%%EOF"/>
-        </assert_contents>
-      </output>
+      <param name="device" value="png"/>
+      <output name="output" file="expected_output_02.png" ftype="png" compare="image_diff"/>
     </test>
 
-    <!-- Test 3: without OTU lines, Others and non assigned -->
-      <test>
+
+    <!-- Test 3: without OTU lines, Others and non assigned --> 
+    <test>
       <param name="input" value="output.phyloseq" ftype="phyloseq"/>
       <param name="x" value="Sample"/>
       <param name="fill" value="Genus"/>
       <param name="facet" value=""/>
       <param name="topX" value="10"/>
       <param name="normalize" value="false"/>
-      <param name="keepOthers" value="false"/>
+      <param name="keepOthers" value="true"/>
       <param name="keepNonAssigned" value="false"/>
       <param name="nolines" value="true"/>
-      <param name="device" value="pdf"/>
-      <output name="output" file="expected_output.pdf" ftype="pdf" compare="sim_size"/>
+      <param name="device" value="png"/>
+      <output name="output" file="expected_output_03.png" ftype="png" compare="image_diff"/>
     </test>
 
-    <!-- Test 4: with normalization, Others and non assigned -->
-      <test>
+    <!-- Test 4: with normalization, Others and non assigned --> 
+    <test>
       <param name="input" value="output.phyloseq" ftype="phyloseq"/>
       <param name="x" value="Sample"/>
       <param name="fill" value="Genus"/>
@@ -109,32 +102,95 @@
       <param name="keepOthers" value="true"/>
       <param name="keepNonAssigned" value="true"/>
       <param name="nolines" value="false"/>
-      <param name="device" value="pdf"/>
-      <output name="output" file="expected_output_normalize.pdf" ftype="pdf" compare="sim_size"/>
+      <param name="device" value="png"/>
+      <output name="output" file="expected_output_04.png" ftype="png" compare="image_diff"/>
     </test>
+
+    <!-- Test 5: with normalization, Others and non assigned, normalization x --> 
+    <test>
+      <param name="input" value="output.phyloseq" ftype="phyloseq"/>
+      <param name="x" value="Property"/>
+      <param name="fill" value="Genus"/>
+      <param name="facet" value=""/>
+      <param name="topX" value="10"/>
+      <param name="normalize" value="true"/>
+      <param name="keepOthers" value="true"/>
+      <param name="keepNonAssigned" value="true"/>
+      <param name="normalize_x" value="true"/>
+      <param name="nolines" value="false"/>
+      <param name="device" value="png"/>
+      <output name="output" file="expected_output_05.png" ftype="png" compare="image_diff"/>
+    </test>
+
+    <!-- Test 6: Minimal -->
+    <test>
+      <param name="input" value="output.phyloseq" ftype="phyloseq"/>
+      <param name="device" value="png"/>
+      <output name="output" file="expected_output_06.png" ftype="png" compare="image_diff"/>
+    </test>
+
   </tests>
 
-  <help>
-    **Description**
+<help>
+<![CDATA[
+**Description**
+
+This tool generates bar charts from a phyloseq object using the 'plot_bar' function in R.
+It allows filtering, normalization, and customization of plots for visualizing microbial community composition.
+
+**Inputs**
+
+- **Input**: A phyloseq object in RDS format.
+
+- **X-axis variable**: The variable to use for the x-axis (Metadata columns).
+
+- **Fill variable**: (Optional) The variable to use for the bar fill colors (e.g., Genus, Species).
 
-    This tool generates bar charts from a phyloseq object using the `plot_bar` function.
+- **Facet by variable**: (Optional) A variable to facet the bar chart (e.g., SampleType).
+
+- **Top X**: Display only the top X most abundant taxa.
+
+- **Keep 'Others'**: Group non-top taxa as 'Others'.
+
+- **Keep 'Not Assigned'**: Retain unassigned taxa labeled as 'Not Assigned'.
 
-    **Inputs**
+- **Normalize**: Normalize abundances to sum to 100%.
+
+- **Remove Separation Lines**: Do not add OTU separation lines in the bars.
+
+- **Width and Height**: Dimensions of the plot in inches (default: 10x8).
+
+- **Device**: Output format (e.g., pdf, png, jpg, tiff).
 
-    - **Input**: A phyloseq object in RDS format.
-    - **X-axis variable**: The variable to use for the x-axis (e.g., Sample, Phylum).
-    - **Fill variable**: (Optional) The variable to use for the bar fill colors (e.g., Genus, Order).
-    - **Facet by variable**: (Optional) A variable to facet the bar chart (e.g., SampleType).
-    - **Width and Height**: Dimensions of the plot in inches (default: 10x8).
-    - **Device**: Output format (e.g., pdf, png, jpeg, tiff).
+**Outputs**
+
+- A file containing the bar chart in the specified format.
+
+**Usage Notes**
+
+Ensure that the input file is a valid phyloseq object in RDS format.
+
+**Examples**
+
+- *Basic Bar Plot with Others and Not Assigned and with separation lines*
 
-    **Outputs**
+  .. image:: $PATH_TO_IMAGES/standard_plot.png  
+     :width: 515  
+     :height: 395  
 
-    - A file containing the bar chart in the specified format.
+- *Faceted Plot*
 
-    **Usage Notes**
+  .. image:: $PATH_TO_IMAGES/facet_plot.png  
+     :width: 515  
+     :height: 395  
+
+- *ASV as fill input and removed separation lines*
 
-    Ensure that the input file is a valid phyloseq object in RDS format.
-  </help>
+  .. image:: $PATH_TO_IMAGES/asv_fill_plot.png  
+     :width: 515  
+     :height: 395  
+]]>
+</help>
+
   <expand macro="citations"/>
 </tool>
Binary file static/images/asv_fill_plot.png has changed
Binary file static/images/facet_plot.png has changed
Binary file static/images/standard_plot.png has changed
Binary file test-data/expected_output_01.png has changed
Binary file test-data/expected_output_02.png has changed
Binary file test-data/expected_output_03.png has changed
Binary file test-data/expected_output_04.png has changed
Binary file test-data/expected_output_05.png has changed
Binary file test-data/expected_output_06.png has changed