Mercurial > repos > iuc > phyloseq_plot_bar
changeset 5:94563110275d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d6888da7aba38b97f6cb827355f2de436565684a
author | iuc |
---|---|
date | Tue, 04 Feb 2025 14:39:08 +0000 |
parents | 9302992205ad |
children | 8a9c340debc8 |
files | macros.xml phyloseq_plot_bar.R phyloseq_plot_bar.xml test-data/expected_output.pdf test-data/expected_output_normalize.pdf |
diffstat | 5 files changed, 81 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Sat Jan 25 17:11:49 2025 +0000 +++ b/macros.xml Tue Feb 04 14:39:08 2025 +0000 @@ -1,6 +1,6 @@ <macros> <token name="@TOOL_VERSION@">1.50.0</token> - <token name="@VERSION_SUFFIX@">1</token> + <token name="@VERSION_SUFFIX@">2</token> <token name="@PROFILE@">21.01</token> <xml name="bio_tools"> <xrefs>
--- a/phyloseq_plot_bar.R Sat Jan 25 17:11:49 2025 +0000 +++ b/phyloseq_plot_bar.R Tue Feb 04 14:39:08 2025 +0000 @@ -54,6 +54,10 @@ make_option(c("--device"), action = "store", dest = "device", default = "pdf", help = "Output format (e.g., 'pdf', 'png', 'jpeg')" + ), + make_option(c("--nolines"), + type = "logical", default = FALSE, + help = "Remove borders (lines) around bars (TRUE/FALSE)" ) ) @@ -80,70 +84,79 @@ physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) } +# Debug: Check available taxonomic ranks +print("Available taxonomic ranks:") +print(colnames(tax_table(physeq))) + +# Handle missing or unassigned taxa for all ranks if (opt$keepNonAssigned) { - # Add synthetic "Not Assigned" for missing/NA taxa - tax_table(physeq) <- apply(tax_table(physeq), c(1, 2), function(x) ifelse(is.na(x) | x == "", "Not Assigned", x)) + # Replace NA or empty values with 'Not Assigned' for all ranks + tax_ranks <- colnames(tax_table(physeq)) + + for (rank in tax_ranks) { + if (rank %in% colnames(tax_table(physeq))) { + # replace NA or empty values with 'Not Assigned' + tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned" + } + } } -# Check if the 'x' and 'fill' variables are valid -sample_vars <- colnames(sample_data(physeq)) -# If topX is provided, filter the phyloseq object to show only top X taxa +# Filter to top X taxa if requested if (!is.null(opt$topX) && opt$topX != "") { topX <- as.numeric(opt$topX) if (is.na(topX) || topX <= 0) { stop("Error: topX should be a positive number.") } - # Aggregate the data at the selected rank (e.g., Phylum) - tax_rank <- opt$fill # Adjust as necessary - physeq_agg <- tax_glom(physeq, taxrank = tax_rank) + tax_rank <- opt$fill + if (!tax_rank %in% colnames(tax_table(physeq))) { + stop(paste("Error: Tax rank", tax_rank, "not found in tax_table.")) + } - # Get the abundance of each taxon at the selected rank + physeq_agg <- tax_glom(physeq, taxrank = tax_rank) taxa_abundance <- taxa_sums(physeq_agg) - - # Summarize the abundance at each taxonomic rank (grouping by taxonomic name) tax_table_agg <- tax_table(physeq_agg) taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum) - - # Identify the top X taxa by summed abundance top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX] - print("Only plotting taxa in TopX taxa group:") + print("Top taxa:") print(top_taxa) - # Get the OTUs corresponding to the top taxa otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] if (opt$keepOthers) { - # Label taxa not in top_taxa as "Others" - tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% otus_in_top_taxa] <- "Others" + tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others" physeq <- physeq_agg } else { - # Subset the phyloseq object to keep only the top X taxa - physeq_filtered <- prune_taxa(otus_in_top_taxa, physeq_agg) - physeq <- physeq_filtered + physeq <- prune_taxa(otus_in_top_taxa, physeq_agg) } } # Generate bar plot if (!is.null(opt$x) && opt$x != "") { p <- plot_bar(physeq, x = opt$x, fill = opt$fill) + - geom_bar(aes(color = NULL, fill = !!sym(opt$fill)), stat = "identity", position = "stack") + geom_bar(aes(fill = !!sym(opt$fill)), + stat = "identity", position = "stack", + color = ifelse(opt$nolines, NA, "black") + ) } else { p <- plot_bar(physeq, fill = opt$fill) + - geom_bar(aes(color = NULL, fill = !!sym(opt$fill)), stat = "identity", position = "stack") + geom_bar(aes(fill = !!sym(opt$fill)), + stat = "identity", position = "stack", + color = ifelse(opt$nolines, NA, "black") + ) } -# Only facet if the facet variable is provided and exists in the sample data +# Optional: Add faceting if specified if (!is.null(opt$facet) && opt$facet != "") { + sample_vars <- colnames(sample_data(physeq)) if (opt$facet %in% sample_vars) { p <- p + facet_wrap(as.formula(paste("~", opt$facet))) } else { - warning(paste("Facet variable", opt$facet, "does not exist in the sample data. Faceting will be skipped.")) + warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting.")) } } - # Save to output file ggsave( filename = opt$output,
--- a/phyloseq_plot_bar.xml Sat Jan 25 17:11:49 2025 +0000 +++ b/phyloseq_plot_bar.xml Tue Feb 04 14:39:08 2025 +0000 @@ -19,6 +19,7 @@ --width '${width}' --height '${height}' --device '${device}' +--nolines '${nolines}' ]]></command> <inputs> <expand macro="phyloseq_input"/> @@ -37,10 +38,20 @@ <option value="jpeg">JPEG</option> <option value="tiff">TIFF</option> </param> + <param name="nolines" type="boolean" label="Remove Separation Lines" help="Do not add separation lines between bars in the chart." /> </inputs> <outputs> - <data name="output" format="pdf" label="Bar Chart (PDF)" /> + <data name="output" format="pdf" label="Bar Chart (${device})"> + <change_format> + <when format="pdf" value="pdf"/> + <when format="png" value="png"/> + <when format="jpeg" value="jpeg"/> + <when format="tiff" value="tiff"/> + </change_format> + + </data> </outputs> + <tests> <!-- Test 1: Basic functionality with x and fill variables --> <test> @@ -71,6 +82,36 @@ </assert_contents> </output> </test> + + <!-- Test 3: without OTU lines, Others and non assigned --> + <test> + <param name="input" value="output.phyloseq" ftype="phyloseq"/> + <param name="x" value="Sample"/> + <param name="fill" value="Genus"/> + <param name="facet" value=""/> + <param name="topX" value="10"/> + <param name="normalize" value="false"/> + <param name="keepOthers" value="false"/> + <param name="keepNonAssigned" value="false"/> + <param name="nolines" value="true"/> + <param name="device" value="pdf"/> + <output name="output" file="expected_output.pdf" ftype="pdf" compare="sim_size"/> + </test> + + <!-- Test 4: with normalization, Others and non assigned --> + <test> + <param name="input" value="output.phyloseq" ftype="phyloseq"/> + <param name="x" value="Sample"/> + <param name="fill" value="Genus"/> + <param name="facet" value=""/> + <param name="topX" value="10"/> + <param name="normalize" value="true"/> + <param name="keepOthers" value="true"/> + <param name="keepNonAssigned" value="true"/> + <param name="nolines" value="false"/> + <param name="device" value="pdf"/> + <output name="output" file="expected_output_normalize.pdf" ftype="pdf" compare="sim_size"/> + </test> </tests> <help>