Mercurial > repos > iuc > phyloseq_plot_bar
changeset 6:8a9c340debc8 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit a5ae2f86b2955290a4c81ab234f02cc51020f970
author | iuc |
---|---|
date | Thu, 13 Mar 2025 09:49:02 +0000 |
parents | 94563110275d |
children | e9442bd54200 |
files | macros.xml phyloseq_plot_bar.R phyloseq_plot_bar.xml static/images/asv_fill_plot.png static/images/facet_plot.png static/images/standard_plot.png test-data/expected_output_01.png test-data/expected_output_02.png test-data/expected_output_03.png test-data/expected_output_04.png test-data/expected_output_05.png test-data/expected_output_06.png |
diffstat | 12 files changed, 218 insertions(+), 83 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Tue Feb 04 14:39:08 2025 +0000 +++ b/macros.xml Thu Mar 13 09:49:02 2025 +0000 @@ -1,6 +1,6 @@ <macros> <token name="@TOOL_VERSION@">1.50.0</token> - <token name="@VERSION_SUFFIX@">2</token> + <token name="@VERSION_SUFFIX@">3</token> <token name="@PROFILE@">21.01</token> <xml name="bio_tools"> <xrefs>
--- a/phyloseq_plot_bar.R Tue Feb 04 14:39:08 2025 +0000 +++ b/phyloseq_plot_bar.R Thu Mar 13 09:49:02 2025 +0000 @@ -4,6 +4,7 @@ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("phyloseq")) suppressPackageStartupMessages(library("ggplot2")) +suppressPackageStartupMessages(library("dplyr")) # Define options option_list <- list( @@ -17,7 +18,7 @@ ), make_option(c("--fill"), action = "store", dest = "fill", default = NULL, - help = "Variable for fill color (e.g., 'Genus', 'Order') (optional)" + help = "Variable for fill color (e.g., 'Genus', 'Order'). Use 'ASV' as argument to show each OTU/ASV." ), make_option(c("--facet"), action = "store", dest = "facet", default = NULL, @@ -43,6 +44,10 @@ action = "store_true", dest = "normalize", default = FALSE, help = "Normalize abundances to sum to 100% (optional)" ), + make_option(c("--normalize_x"), + action = "store_true", dest = "normalize_x", default = FALSE, + help = "Normalize x groups to sum up to 100%" + ), make_option(c("--width"), action = "store", dest = "width", default = 10, type = "numeric", help = "Width of the output plot in inches" @@ -53,19 +58,21 @@ ), make_option(c("--device"), action = "store", dest = "device", default = "pdf", - help = "Output format (e.g., 'pdf', 'png', 'jpeg')" + help = "Output format (e.g., 'pdf', 'png', 'jpg')" ), make_option(c("--nolines"), - type = "logical", default = FALSE, + action = "store_true", dest = "nolines", default = FALSE, help = "Remove borders (lines) around bars (TRUE/FALSE)" ) ) + # Parse arguments parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) args <- parse_args(parser, positional_arguments = TRUE) opt <- args$options + # Validate required options if (is.null(opt$input) || opt$input == "") { stop("Error: Input file is required.") @@ -74,10 +81,28 @@ stop("Error: Output file is required.") } +if (is.null(opt$fill) || opt$fill == "") { + print(paste("No fill chosen using ASV")) + opt$fill <- "ASV" +} + # Load phyloseq object print(paste("Trying to read:", opt$input)) physeq <- readRDS(opt$input) +## Allow to use OTU as tax group +# Extract rownames (taxids) from the tax_table and add them as a new column +taxids <- rownames(tax_table(physeq)) + +# Get the number of columns in the tax_table +num_columns <- ncol(tax_table(physeq)) + +# Add the taxids as a new last column in the tax_table +tax_table(physeq) <- cbind(tax_table(physeq), taxid = taxids) + +# Rename the last column to 'ASV' / OTU does conflict with phyloseq logic +colnames(tax_table(physeq))[num_columns + 1] <- "ASV" + # Normalize to relative abundances if requested if (opt$normalize) { print("Normalizing abundances to sum to 100%...") @@ -85,16 +110,22 @@ } # Debug: Check available taxonomic ranks + +tax_ranks <- colnames(tax_table(physeq)) +sample_vars <- colnames(sample_data(physeq)) + print("Available taxonomic ranks:") -print(colnames(tax_table(physeq))) +print(tax_ranks) + +print("Available metadata:") +print(sample_vars) # Handle missing or unassigned taxa for all ranks if (opt$keepNonAssigned) { # Replace NA or empty values with 'Not Assigned' for all ranks - tax_ranks <- colnames(tax_table(physeq)) for (rank in tax_ranks) { - if (rank %in% colnames(tax_table(physeq))) { + if (rank %in% tax_ranks) { # replace NA or empty values with 'Not Assigned' tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned" } @@ -124,7 +155,9 @@ otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] + # Group non-top OTUs as 'Others' if requested if (opt$keepOthers) { + # Update the tax_table to assign 'Others' to non-top taxa tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others" physeq <- physeq_agg } else { @@ -132,30 +165,76 @@ } } + +# normalize x groups if needed +if (opt$x %in% sample_vars) { + if (opt$normalize_x && !is.null(opt$x) && opt$x != "") { + physeq_agg <- merge_samples(physeq, opt$x) + + physeq <- transform_sample_counts(physeq_agg, function(x) (x / sum(x) * 100)) + opt$x <- NULL # set to Null since we do not need x for downstream now + opt$facet <- NULL # set to Null since facetting does not work with normalize x + warning(paste("normalize x does not work with facetting")) + } +} else { + warning(paste("x", opt$x, "not found in sample data. Skipping normalize_x.")) +} + + +# Check if the facet variable is valid and exists +facet_var <- NULL +if (!is.null(opt$facet) && opt$facet != "") { + if (opt$facet %in% sample_vars || opt$facet %in% tax_ranks) { + facet_var <- opt$facet # Store facet variable for later + } else { + warning(paste("Facet variable", opt$facet, "not found in sample data or tax ranks. Skipping faceting.")) + } +} + +# Determine if faceting is needed +facet_formula <- if (!is.null(facet_var)) as.formula(paste("~", facet_var)) else NULL + +# Define color based on the `nolines` option +plot_color <- ifelse(opt$nolines, NA, "black") + # Generate bar plot if (!is.null(opt$x) && opt$x != "") { - p <- plot_bar(physeq, x = opt$x, fill = opt$fill) + - geom_bar(aes(fill = !!sym(opt$fill)), - stat = "identity", position = "stack", - color = ifelse(opt$nolines, NA, "black") + p <- plot_bar(physeq, + x = opt$x, + fill = opt$fill + ) + facet_wrap(facet_formula, scales = "free_x") + + geom_bar( + stat = "identity", + position = "stack", + aes(fill = !!sym(opt$fill)), + color = plot_color ) } else { - p <- plot_bar(physeq, fill = opt$fill) + - geom_bar(aes(fill = !!sym(opt$fill)), - stat = "identity", position = "stack", - color = ifelse(opt$nolines, NA, "black") + p <- plot_bar(physeq, + fill = opt$fill + ) + facet_wrap(facet_formula, scales = "free_x") + + geom_bar( + stat = "identity", + position = "stack", + aes(fill = !!sym(opt$fill)), + color = plot_color ) } -# Optional: Add faceting if specified -if (!is.null(opt$facet) && opt$facet != "") { - sample_vars <- colnames(sample_data(physeq)) - if (opt$facet %in% sample_vars) { - p <- p + facet_wrap(as.formula(paste("~", opt$facet))) - } else { - warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting.")) - } + +# Reorder fill levels to ensure "Not Assigned" and "Others" are at the bottom if they exist +fill_values <- unique(p$data[[opt$fill]]) # Get unique fill values +new_levels <- setdiff(fill_values, c("Not Assigned", "Others")) # Exclude "Not Assigned" and "Others" + +if ("Not Assigned" %in% fill_values) { + new_levels <- c("Not Assigned", new_levels) # Place "Not Assigned" at the bottom if it exists } +if ("Others" %in% fill_values) { + new_levels <- c("Others", new_levels) # Place "Others" at the bottom if it exists +} + +# Apply the new levels to the fill variable in the plot data +p$data[[opt$fill]] <- factor(p$data[[opt$fill]], levels = new_levels) # Save to output file ggsave(
--- a/phyloseq_plot_bar.xml Tue Feb 04 14:39:08 2025 +0000 +++ b/phyloseq_plot_bar.xml Thu Mar 13 09:49:02 2025 +0000 @@ -10,45 +10,46 @@ --input '$input' --x '$x' --fill '$fill' ---facet '${facet}' +--facet '$facet' +--topX '$topX' +'$keepOthers' +'$keepNonAssigned' +'$normalize' +'$normalize_x' +'$nolines' +--width '$width' +--height '$height' +--device '$device' --output '$output' ---topX '${topX}' ---keepOthers '${keepOthers}' ---keepNonAssigned '${keepNonAssigned}' ---normalize '${normalize}' ---width '${width}' ---height '${height}' ---device '${device}' ---nolines '${nolines}' ]]></command> <inputs> <expand macro="phyloseq_input"/> - <param name="x" type="text" optional="true" label="X-axis variable" help="Variable for the x-axis (e.g., Sample, Phylum). If not specified, the Samples are taken." /> - <param name="fill" type="text" label="Fill variable" help="Variable to color the bars (e.g., Genus, Order)." /> + <param name="x" type="text" optional="true" label="X-axis variable" help="Variable for the x-axis (Metadata columns). If not specified, the Samples are taken." /> + <param name="fill" type="text" label="Fill variable" help="Variable to color the bars (e.g., Genus, Species). Use 'ASV' as argument to show each OTU/ASV." /> <param name="facet" type="text" optional="true" label="Facet by variable" help="Variable to facet the chart by (e.g., SampleType)." /> <param name="topX" value="10" type="integer" optional="true" label="Top X" help="Only show the ranks with the top X abundance." /> - <param name="keepOthers" type="boolean" label="Keep 'Others'" help="Keep OTUs which are not in top X as 'Others'." /> - <param name="keepNonAssigned" type="boolean" label="Keep Non Assigned" help="Keep OTUs that are not assigned at this rank and label as 'Not Assigned'." /> - <param name="normalize" type="boolean" label="Normalize" help="Normalize abundances to sum to 100%. Normalization is performed before Top X selection." /> + <param name="keepOthers" type="boolean" truevalue="--keepOthers" falsevalue="" label="Keep 'Others'" help="Keep OTUs which are not in top X as 'Others'." /> + <param name="keepNonAssigned" type="boolean" truevalue="--keepNonAssigned" falsevalue="" label="Keep 'Not Assigned'" help="Keep OTUs that are not assigned at this rank and label as 'Not Assigned'." /> + <param name="normalize" type="boolean" truevalue="--normalize" falsevalue="" label="Normalize" help="Normalize abundances to sum to 100%. Normalization is performed before Top X selection." /> + <param name="normalize_x" type="boolean" truevalue="--normalize_x" falsevalue="" label="Normalize x" help="Normalize abundances of the x group to sum to 100%. Normalization is performed before Top X selection. Ignores facetting!" /> + <param name="nolines" type="boolean" truevalue="--nolines" falsevalue="" label="Remove Separation Lines" help="Do not add OTU separation lines in the bars." /> <param name="width" type="float" value="10" optional="true" label="Plot Width" help="Width of the output plot in inches." /> <param name="height" type="float" value="8" optional="true" label="Plot Height" help="Height of the output plot in inches." /> - <param name="device" type="select" value="pdf" label="Output Device" help="Device to use for the output file. Options include pdf, png, and others."> + <param name="device" type="select" value="pdf" label="Output Device" help="File format to use for the output file. "> <option value="pdf">PDF</option> <option value="png">PNG</option> - <option value="jpeg">JPEG</option> + <option value="jpg">JEG</option> <option value="tiff">TIFF</option> </param> - <param name="nolines" type="boolean" label="Remove Separation Lines" help="Do not add separation lines between bars in the chart." /> </inputs> <outputs> <data name="output" format="pdf" label="Bar Chart (${device})"> <change_format> - <when format="pdf" value="pdf"/> - <when format="png" value="png"/> - <when format="jpeg" value="jpeg"/> - <when format="tiff" value="tiff"/> + <when input="device" format="pdf" value="pdf"/> + <when input="device" format="png" value="png"/> + <when input="device" format="jpg" value="jpg"/> + <when input="device" format="tiff" value="tiff"/> </change_format> - </data> </outputs> @@ -58,13 +59,8 @@ <param name="input" value="output.phyloseq" ftype="phyloseq"/> <param name="x" value="Property"/> <param name="fill" value="Phylum"/> - <param name="device" value="pdf"/> - <output name="output" ftype="pdf"> - <assert_contents> - <has_text text="%PDF"/> - <has_text text="%%EOF"/> - </assert_contents> - </output> + <param name="device" value="png"/> + <output name="output" file="expected_output_01.png" ftype="png" compare="image_diff"/> </test> <!-- Test 2: TopX filtering and normalization --> @@ -72,34 +68,31 @@ <param name="input" value="output.phyloseq" ftype="phyloseq"/> <param name="x" value="Property"/> <param name="fill" value="Genus"/> - <param name="facet" value="SampleType"/> + <param name="facet" value="Number"/> <param name="topX" value="10"/> <param name="normalize" value="true"/> - <output name="output" ftype="pdf"> - <assert_contents> - <has_text text="%PDF"/> - <has_text text="%%EOF"/> - </assert_contents> - </output> + <param name="device" value="png"/> + <output name="output" file="expected_output_02.png" ftype="png" compare="image_diff"/> </test> - <!-- Test 3: without OTU lines, Others and non assigned --> - <test> + + <!-- Test 3: without OTU lines, Others and non assigned --> + <test> <param name="input" value="output.phyloseq" ftype="phyloseq"/> <param name="x" value="Sample"/> <param name="fill" value="Genus"/> <param name="facet" value=""/> <param name="topX" value="10"/> <param name="normalize" value="false"/> - <param name="keepOthers" value="false"/> + <param name="keepOthers" value="true"/> <param name="keepNonAssigned" value="false"/> <param name="nolines" value="true"/> - <param name="device" value="pdf"/> - <output name="output" file="expected_output.pdf" ftype="pdf" compare="sim_size"/> + <param name="device" value="png"/> + <output name="output" file="expected_output_03.png" ftype="png" compare="image_diff"/> </test> - <!-- Test 4: with normalization, Others and non assigned --> - <test> + <!-- Test 4: with normalization, Others and non assigned --> + <test> <param name="input" value="output.phyloseq" ftype="phyloseq"/> <param name="x" value="Sample"/> <param name="fill" value="Genus"/> @@ -109,32 +102,95 @@ <param name="keepOthers" value="true"/> <param name="keepNonAssigned" value="true"/> <param name="nolines" value="false"/> - <param name="device" value="pdf"/> - <output name="output" file="expected_output_normalize.pdf" ftype="pdf" compare="sim_size"/> + <param name="device" value="png"/> + <output name="output" file="expected_output_04.png" ftype="png" compare="image_diff"/> </test> + + <!-- Test 5: with normalization, Others and non assigned, normalization x --> + <test> + <param name="input" value="output.phyloseq" ftype="phyloseq"/> + <param name="x" value="Property"/> + <param name="fill" value="Genus"/> + <param name="facet" value=""/> + <param name="topX" value="10"/> + <param name="normalize" value="true"/> + <param name="keepOthers" value="true"/> + <param name="keepNonAssigned" value="true"/> + <param name="normalize_x" value="true"/> + <param name="nolines" value="false"/> + <param name="device" value="png"/> + <output name="output" file="expected_output_05.png" ftype="png" compare="image_diff"/> + </test> + + <!-- Test 6: Minimal --> + <test> + <param name="input" value="output.phyloseq" ftype="phyloseq"/> + <param name="device" value="png"/> + <output name="output" file="expected_output_06.png" ftype="png" compare="image_diff"/> + </test> + </tests> - <help> - **Description** +<help> +<![CDATA[ +**Description** + +This tool generates bar charts from a phyloseq object using the 'plot_bar' function in R. +It allows filtering, normalization, and customization of plots for visualizing microbial community composition. + +**Inputs** + +- **Input**: A phyloseq object in RDS format. + +- **X-axis variable**: The variable to use for the x-axis (Metadata columns). + +- **Fill variable**: (Optional) The variable to use for the bar fill colors (e.g., Genus, Species). - This tool generates bar charts from a phyloseq object using the `plot_bar` function. +- **Facet by variable**: (Optional) A variable to facet the bar chart (e.g., SampleType). + +- **Top X**: Display only the top X most abundant taxa. + +- **Keep 'Others'**: Group non-top taxa as 'Others'. + +- **Keep 'Not Assigned'**: Retain unassigned taxa labeled as 'Not Assigned'. - **Inputs** +- **Normalize**: Normalize abundances to sum to 100%. + +- **Remove Separation Lines**: Do not add OTU separation lines in the bars. + +- **Width and Height**: Dimensions of the plot in inches (default: 10x8). + +- **Device**: Output format (e.g., pdf, png, jpg, tiff). - - **Input**: A phyloseq object in RDS format. - - **X-axis variable**: The variable to use for the x-axis (e.g., Sample, Phylum). - - **Fill variable**: (Optional) The variable to use for the bar fill colors (e.g., Genus, Order). - - **Facet by variable**: (Optional) A variable to facet the bar chart (e.g., SampleType). - - **Width and Height**: Dimensions of the plot in inches (default: 10x8). - - **Device**: Output format (e.g., pdf, png, jpeg, tiff). +**Outputs** + +- A file containing the bar chart in the specified format. + +**Usage Notes** + +Ensure that the input file is a valid phyloseq object in RDS format. + +**Examples** + +- *Basic Bar Plot with Others and Not Assigned and with separation lines* - **Outputs** + .. image:: $PATH_TO_IMAGES/standard_plot.png + :width: 515 + :height: 395 - - A file containing the bar chart in the specified format. +- *Faceted Plot* - **Usage Notes** + .. image:: $PATH_TO_IMAGES/facet_plot.png + :width: 515 + :height: 395 + +- *ASV as fill input and removed separation lines* - Ensure that the input file is a valid phyloseq object in RDS format. - </help> + .. image:: $PATH_TO_IMAGES/asv_fill_plot.png + :width: 515 + :height: 395 +]]> +</help> + <expand macro="citations"/> </tool>