changeset 5:94563110275d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d6888da7aba38b97f6cb827355f2de436565684a
author iuc
date Tue, 04 Feb 2025 14:39:08 +0000
parents 9302992205ad
children 8a9c340debc8
files macros.xml phyloseq_plot_bar.R phyloseq_plot_bar.xml test-data/expected_output.pdf test-data/expected_output_normalize.pdf
diffstat 5 files changed, 81 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Sat Jan 25 17:11:49 2025 +0000
+++ b/macros.xml	Tue Feb 04 14:39:08 2025 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">1.50.0</token>
-    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@VERSION_SUFFIX@">2</token>
     <token name="@PROFILE@">21.01</token>
     <xml name="bio_tools">
         <xrefs>
--- a/phyloseq_plot_bar.R	Sat Jan 25 17:11:49 2025 +0000
+++ b/phyloseq_plot_bar.R	Tue Feb 04 14:39:08 2025 +0000
@@ -54,6 +54,10 @@
     make_option(c("--device"),
         action = "store", dest = "device", default = "pdf",
         help = "Output format (e.g., 'pdf', 'png', 'jpeg')"
+    ),
+    make_option(c("--nolines"),
+        type = "logical", default = FALSE,
+        help = "Remove borders (lines) around bars (TRUE/FALSE)"
     )
 )
 
@@ -80,70 +84,79 @@
     physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x))
 }
 
+# Debug: Check available taxonomic ranks
+print("Available taxonomic ranks:")
+print(colnames(tax_table(physeq)))
+
+# Handle missing or unassigned taxa for all ranks
 if (opt$keepNonAssigned) {
-    # Add synthetic "Not Assigned" for missing/NA taxa
-    tax_table(physeq) <- apply(tax_table(physeq), c(1, 2), function(x) ifelse(is.na(x) | x == "", "Not Assigned", x))
+    # Replace NA or empty values with 'Not Assigned' for all ranks
+    tax_ranks <- colnames(tax_table(physeq))
+
+    for (rank in tax_ranks) {
+        if (rank %in% colnames(tax_table(physeq))) {
+            # replace NA or empty values with 'Not Assigned'
+            tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned"
+        }
+    }
 }
-# Check if the 'x' and 'fill' variables are valid
-sample_vars <- colnames(sample_data(physeq))
 
-# If topX is provided, filter the phyloseq object to show only top X taxa
+# Filter to top X taxa if requested
 if (!is.null(opt$topX) && opt$topX != "") {
     topX <- as.numeric(opt$topX)
     if (is.na(topX) || topX <= 0) {
         stop("Error: topX should be a positive number.")
     }
 
-    # Aggregate the data at the selected rank (e.g., Phylum)
-    tax_rank <- opt$fill # Adjust as necessary
-    physeq_agg <- tax_glom(physeq, taxrank = tax_rank)
+    tax_rank <- opt$fill
+    if (!tax_rank %in% colnames(tax_table(physeq))) {
+        stop(paste("Error: Tax rank", tax_rank, "not found in tax_table."))
+    }
 
-    # Get the abundance of each taxon at the selected rank
+    physeq_agg <- tax_glom(physeq, taxrank = tax_rank)
     taxa_abundance <- taxa_sums(physeq_agg)
-
-    # Summarize the abundance at each taxonomic rank (grouping by taxonomic name)
     tax_table_agg <- tax_table(physeq_agg)
     taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum)
-
-    # Identify the top X taxa by summed abundance
     top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX]
 
-    print("Only plotting taxa in TopX taxa group:")
+    print("Top taxa:")
     print(top_taxa)
 
-    # Get the OTUs corresponding to the top taxa
     otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa]
 
     if (opt$keepOthers) {
-        # Label taxa not in top_taxa as "Others"
-        tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% otus_in_top_taxa] <- "Others"
+        tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others"
         physeq <- physeq_agg
     } else {
-        # Subset the phyloseq object to keep only the top X taxa
-        physeq_filtered <- prune_taxa(otus_in_top_taxa, physeq_agg)
-        physeq <- physeq_filtered
+        physeq <- prune_taxa(otus_in_top_taxa, physeq_agg)
     }
 }
 
 # Generate bar plot
 if (!is.null(opt$x) && opt$x != "") {
     p <- plot_bar(physeq, x = opt$x, fill = opt$fill) +
-        geom_bar(aes(color = NULL, fill = !!sym(opt$fill)), stat = "identity", position = "stack")
+        geom_bar(aes(fill = !!sym(opt$fill)),
+            stat = "identity", position = "stack",
+            color = ifelse(opt$nolines, NA, "black")
+        )
 } else {
     p <- plot_bar(physeq, fill = opt$fill) +
-        geom_bar(aes(color = NULL, fill = !!sym(opt$fill)), stat = "identity", position = "stack")
+        geom_bar(aes(fill = !!sym(opt$fill)),
+            stat = "identity", position = "stack",
+            color = ifelse(opt$nolines, NA, "black")
+        )
 }
 
-# Only facet if the facet variable is provided and exists in the sample data
+# Optional: Add faceting if specified
 if (!is.null(opt$facet) && opt$facet != "") {
+    sample_vars <- colnames(sample_data(physeq))
     if (opt$facet %in% sample_vars) {
         p <- p + facet_wrap(as.formula(paste("~", opt$facet)))
     } else {
-        warning(paste("Facet variable", opt$facet, "does not exist in the sample data. Faceting will be skipped."))
+        warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting."))
     }
 }
 
-
 # Save to output file
 ggsave(
     filename = opt$output,
--- a/phyloseq_plot_bar.xml	Sat Jan 25 17:11:49 2025 +0000
+++ b/phyloseq_plot_bar.xml	Tue Feb 04 14:39:08 2025 +0000
@@ -19,6 +19,7 @@
 --width '${width}'
 --height '${height}'
 --device '${device}'
+--nolines '${nolines}'
   ]]></command>
   <inputs>
     <expand macro="phyloseq_input"/>
@@ -37,10 +38,20 @@
       <option value="jpeg">JPEG</option>
       <option value="tiff">TIFF</option>
     </param>
+    <param name="nolines" type="boolean" label="Remove Separation Lines" help="Do not add separation lines between bars in the chart." />
   </inputs>
   <outputs>
-    <data name="output" format="pdf" label="Bar Chart (PDF)" />
+    <data name="output" format="pdf" label="Bar Chart (${device})">
+      <change_format>
+        <when format="pdf" value="pdf"/>
+        <when format="png" value="png"/>
+        <when format="jpeg" value="jpeg"/>
+        <when format="tiff" value="tiff"/>
+      </change_format>
+
+    </data>
   </outputs>
+
   <tests>
     <!-- Test 1: Basic functionality with x and fill variables -->
     <test>
@@ -71,6 +82,36 @@
         </assert_contents>
       </output>
     </test>
+
+    <!-- Test 3: without OTU lines, Others and non assigned -->
+      <test>
+      <param name="input" value="output.phyloseq" ftype="phyloseq"/>
+      <param name="x" value="Sample"/>
+      <param name="fill" value="Genus"/>
+      <param name="facet" value=""/>
+      <param name="topX" value="10"/>
+      <param name="normalize" value="false"/>
+      <param name="keepOthers" value="false"/>
+      <param name="keepNonAssigned" value="false"/>
+      <param name="nolines" value="true"/>
+      <param name="device" value="pdf"/>
+      <output name="output" file="expected_output.pdf" ftype="pdf" compare="sim_size"/>
+    </test>
+
+    <!-- Test 4: with normalization, Others and non assigned -->
+      <test>
+      <param name="input" value="output.phyloseq" ftype="phyloseq"/>
+      <param name="x" value="Sample"/>
+      <param name="fill" value="Genus"/>
+      <param name="facet" value=""/>
+      <param name="topX" value="10"/>
+      <param name="normalize" value="true"/>
+      <param name="keepOthers" value="true"/>
+      <param name="keepNonAssigned" value="true"/>
+      <param name="nolines" value="false"/>
+      <param name="device" value="pdf"/>
+      <output name="output" file="expected_output_normalize.pdf" ftype="pdf" compare="sim_size"/>
+    </test>
   </tests>
 
   <help>
Binary file test-data/expected_output.pdf has changed
Binary file test-data/expected_output_normalize.pdf has changed