Mercurial > repos > iuc > phyloseq_plot_bar
changeset 3:d0fa6a43c838 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 26f87cc62468c9c329b33246af4f14e2659856f9
author | iuc |
---|---|
date | Fri, 10 Jan 2025 14:58:52 +0000 |
parents | 0aacbbcd86eb |
children | 9302992205ad |
files | phyloseq_plot_bar.R phyloseq_plot_bar.xml |
diffstat | 2 files changed, 140 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/phyloseq_plot_bar.R Tue Jan 07 17:58:06 2025 +0000 +++ b/phyloseq_plot_bar.R Fri Jan 10 14:58:52 2025 +0000 @@ -26,6 +26,34 @@ make_option(c("--output"), action = "store", dest = "output", help = "Output file (PDF)" + ), + make_option(c("--topX"), + action = "store", dest = "topX", default = NULL, + help = "Show only the top X taxa based on abundance (e.g., '10') (optional)" + ), + make_option(c("--keepOthers"), + action = "store_true", dest = "keepOthers", default = FALSE, + help = "Keep taxa not in topX and label them as 'Others' (optional)" + ), + make_option(c("--keepNonAssigned"), + action = "store_true", dest = "keepNonAssigned", default = FALSE, + help = "Keep taxa labeled as 'Not Assigned' (optional)" + ), + make_option(c("--normalize"), + action = "store_true", dest = "normalize", default = FALSE, + help = "Normalize abundances to sum to 100% (optional)" + ), + make_option(c("--width"), + action = "store", dest = "width", default = 10, + type = "numeric", help = "Width of the output plot in inches" + ), + make_option(c("--height"), + action = "store", dest = "height", default = 8, + type = "numeric", help = "Height of the output plot in inches" + ), + make_option(c("--device"), + action = "store", dest = "device", default = "pdf", + help = "Output format (e.g., 'pdf', 'png', 'jpeg')" ) ) @@ -38,9 +66,6 @@ if (is.null(opt$input) || opt$input == "") { stop("Error: Input file is required.") } -if (is.null(opt$x) || opt$x == "") { - stop("Error: X-axis variable is required.") -} if (is.null(opt$output) || opt$output == "") { stop("Error: Output file is required.") } @@ -49,14 +74,63 @@ print(paste("Trying to read:", opt$input)) physeq <- readRDS(opt$input) +# Normalize to relative abundances if requested +if (opt$normalize) { + print("Normalizing abundances to sum to 100%...") + physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) +} + +if (opt$keepNonAssigned) { + # Add synthetic "Not Assigned" for missing/NA taxa + tax_table(physeq) <- apply(tax_table(physeq), c(1, 2), function(x) ifelse(is.na(x) | x == "", "Not Assigned", x)) +} # Check if the 'x' and 'fill' variables are valid sample_vars <- colnames(sample_data(physeq)) -if (!opt$x %in% sample_vars) { - stop(paste("Error: X-axis variable", opt$x, "does not exist in the sample data.")) + +# If topX is provided, filter the phyloseq object to show only top X taxa +if (!is.null(opt$topX) && opt$topX != "") { + topX <- as.numeric(opt$topX) + if (is.na(topX) || topX <= 0) { + stop("Error: topX should be a positive number.") + } + + # Aggregate the data at the selected rank (e.g., Phylum) + tax_rank <- opt$fill # Adjust as necessary + physeq_agg <- tax_glom(physeq, taxrank = tax_rank) + + # Get the abundance of each taxon at the selected rank + taxa_abundance <- taxa_sums(physeq_agg) + + # Summarize the abundance at each taxonomic rank (grouping by taxonomic name) + tax_table_agg <- tax_table(physeq_agg) + taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum) + + # Identify the top X taxa by summed abundance + top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX] + + print("Only plotting taxa in TopX taxa group:") + print(top_taxa) + + # Get the OTUs corresponding to the top taxa + otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] + + if (opt$keepOthers) { + # Label taxa not in top_taxa as "Others" + tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% otus_in_top_taxa] <- "Others" + physeq <- physeq_agg + } else { + # Subset the phyloseq object to keep only the top X taxa + physeq_filtered <- prune_taxa(otus_in_top_taxa, physeq_agg) + physeq <- physeq_filtered + } } # Generate bar plot -p <- plot_bar(physeq, x = opt$x, fill = opt$fill) +if (!is.null(opt$x) && opt$x != "") { + p <- plot_bar(physeq, x = opt$x, fill = opt$fill) +} else { + p <- plot_bar(physeq, fill = opt$fill) # If no x is provided, don't include x +} # Only facet if the facet variable is provided and exists in the sample data if (!is.null(opt$facet) && opt$facet != "") { @@ -67,8 +141,11 @@ } } -# Save to output file using PDF device -print(paste("Saving plot to:", opt$output)) -pdf(file = opt$output, width = 10, height = 8) -print(p) -dev.off() +# Save to output file +ggsave( + filename = opt$output, + plot = p, + width = opt$width, + height = opt$height, + device = opt$device +)
--- a/phyloseq_plot_bar.xml Tue Jan 07 17:58:06 2025 +0000 +++ b/phyloseq_plot_bar.xml Fri Jan 10 14:58:52 2025 +0000 @@ -12,44 +12,66 @@ --fill '$fill' --facet '${facet}' --output '$output' +--topX '${topX}' +--keepOthers '${keepOthers}' +--keepNonAssigned '${keepNonAssigned}' +--normalize '${normalize}' +--width '${width}' +--height '${height}' +--device '${device}' ]]></command> <inputs> <expand macro="phyloseq_input"/> - <param name="x" type="text" label="X-axis variable" help="Variable for the x-axis (e.g., Sample, Phylum)" /> - <param name="fill" type="text" label="Fill variable" help="Variable to color the bars (e.g., Genus, Order)" /> - <param name="facet" type="text" optional="true" label="Facet by variable" help="Optional: Variable to facet the chart by (e.g., SampleType)" /> + <param name="x" type="text" optional="true" label="X-axis variable" help="Variable for the x-axis (e.g., Sample, Phylum). If not specified, the Samples are taken." /> + <param name="fill" type="text" label="Fill variable" help="Variable to color the bars (e.g., Genus, Order)." /> + <param name="facet" type="text" optional="true" label="Facet by variable" help="Variable to facet the chart by (e.g., SampleType)." /> + <param name="topX" value="10" type="integer" optional="true" label="Top X" help="Only show the ranks with the top X abundance." /> + <param name="keepOthers" type="boolean" label="Keep 'Others'" help="Keep OTUs which are not in top X as 'Others'." /> + <param name="keepNonAssigned" type="boolean" label="Keep Non Assigned" help="Keep OTUs that are not assigned at this rank and label as 'Not Assigned'." /> + <param name="normalize" type="boolean" label="Normalize" help="Normalize abundances to sum to 100%. Normalization is performed before Top X selection." /> + <param name="width" type="float" value="10" optional="true" label="Plot Width" help="Width of the output plot in inches." /> + <param name="height" type="float" value="8" optional="true" label="Plot Height" help="Height of the output plot in inches." /> + <param name="device" type="select" value="pdf" label="Output Device" help="Device to use for the output file. Options include pdf, png, and others."> + <option value="pdf">PDF</option> + <option value="png">PNG</option> + <option value="jpeg">JPEG</option> + <option value="tiff">TIFF</option> + </param> </inputs> <outputs> <data name="output" format="pdf" label="Bar Chart (PDF)" /> </outputs> - <tests> - <!-- Test 1: Default parameters --> + <tests> + <!-- Test 1: Basic functionality with x and fill variables --> <test> - <param name="input" value="output.phyloseq" ftype="phyloseq"/> - <param name="x" value="Property"/> - <param name="fill" value="Number"/> - <param name="facet" value="Property"/> - <output name="output" ftype="pdf"> - <assert_contents> - <has_text text="%PDF"/> - <has_text text="%%EOF"/> - </assert_contents> - </output> + <param name="input" value="output.phyloseq" ftype="phyloseq"/> + <param name="x" value="Property"/> + <param name="fill" value="Phylum"/> + <param name="device" value="pdf"/> + <output name="output" ftype="pdf"> + <assert_contents> + <has_text text="%PDF"/> + <has_text text="%%EOF"/> + </assert_contents> + </output> </test> - <!-- Test 2: Valid parameters without facet --> + + <!-- Test 2: TopX filtering and normalization --> <test> - <param name="input" value="output.phyloseq" ftype="phyloseq"/> - <param name="x" value="Property"/> - <param name="fill" value="Number"/> - <param name="facet" value=""/> - <output name="output" ftype="pdf"> - <assert_contents> - <has_text text="%PDF"/> - <has_text text="%%EOF"/> - </assert_contents> - </output> + <param name="input" value="output.phyloseq" ftype="phyloseq"/> + <param name="x" value="Property"/> + <param name="fill" value="Genus"/> + <param name="facet" value="SampleType"/> + <param name="topX" value="10"/> + <param name="normalize" value="true"/> + <output name="output" ftype="pdf"> + <assert_contents> + <has_text text="%PDF"/> + <has_text text="%%EOF"/> + </assert_contents> + </output> </test> -</tests> + </tests> <help> **Description** @@ -62,10 +84,12 @@ - **X-axis variable**: The variable to use for the x-axis (e.g., Sample, Phylum). - **Fill variable**: (Optional) The variable to use for the bar fill colors (e.g., Genus, Order). - **Facet by variable**: (Optional) A variable to facet the bar chart (e.g., SampleType). + - **Width and Height**: Dimensions of the plot in inches (default: 10x8). + - **Device**: Output format (e.g., pdf, png, jpeg, tiff). **Outputs** - - A PDF file containing the bar chart. + - A file containing the bar chart in the specified format. **Usage Notes** @@ -73,4 +97,3 @@ </help> <expand macro="citations"/> </tool> -