comparison phyloseq_plot_bar.R @ 9:6000b8a8dd9d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit a5ae2f86b2955290a4c81ab234f02cc51020f970
author iuc
date Thu, 13 Mar 2025 09:49:14 +0000
parents 0b9d4ec77c4f
children
comparison
equal deleted inserted replaced
8:0b9d4ec77c4f 9:6000b8a8dd9d
2 2
3 # Load libraries 3 # Load libraries
4 suppressPackageStartupMessages(library("optparse")) 4 suppressPackageStartupMessages(library("optparse"))
5 suppressPackageStartupMessages(library("phyloseq")) 5 suppressPackageStartupMessages(library("phyloseq"))
6 suppressPackageStartupMessages(library("ggplot2")) 6 suppressPackageStartupMessages(library("ggplot2"))
7 suppressPackageStartupMessages(library("dplyr"))
7 8
8 # Define options 9 # Define options
9 option_list <- list( 10 option_list <- list(
10 make_option(c("--input"), 11 make_option(c("--input"),
11 action = "store", dest = "input", 12 action = "store", dest = "input",
15 action = "store", dest = "x", 16 action = "store", dest = "x",
16 help = "Variable for x-axis (e.g., 'Sample', 'Phylum')" 17 help = "Variable for x-axis (e.g., 'Sample', 'Phylum')"
17 ), 18 ),
18 make_option(c("--fill"), 19 make_option(c("--fill"),
19 action = "store", dest = "fill", default = NULL, 20 action = "store", dest = "fill", default = NULL,
20 help = "Variable for fill color (e.g., 'Genus', 'Order') (optional)" 21 help = "Variable for fill color (e.g., 'Genus', 'Order'). Use 'ASV' as argument to show each OTU/ASV."
21 ), 22 ),
22 make_option(c("--facet"), 23 make_option(c("--facet"),
23 action = "store", dest = "facet", default = NULL, 24 action = "store", dest = "facet", default = NULL,
24 help = "Facet by variable (optional)" 25 help = "Facet by variable (optional)"
25 ), 26 ),
41 ), 42 ),
42 make_option(c("--normalize"), 43 make_option(c("--normalize"),
43 action = "store_true", dest = "normalize", default = FALSE, 44 action = "store_true", dest = "normalize", default = FALSE,
44 help = "Normalize abundances to sum to 100% (optional)" 45 help = "Normalize abundances to sum to 100% (optional)"
45 ), 46 ),
47 make_option(c("--normalize_x"),
48 action = "store_true", dest = "normalize_x", default = FALSE,
49 help = "Normalize x groups to sum up to 100%"
50 ),
46 make_option(c("--width"), 51 make_option(c("--width"),
47 action = "store", dest = "width", default = 10, 52 action = "store", dest = "width", default = 10,
48 type = "numeric", help = "Width of the output plot in inches" 53 type = "numeric", help = "Width of the output plot in inches"
49 ), 54 ),
50 make_option(c("--height"), 55 make_option(c("--height"),
51 action = "store", dest = "height", default = 8, 56 action = "store", dest = "height", default = 8,
52 type = "numeric", help = "Height of the output plot in inches" 57 type = "numeric", help = "Height of the output plot in inches"
53 ), 58 ),
54 make_option(c("--device"), 59 make_option(c("--device"),
55 action = "store", dest = "device", default = "pdf", 60 action = "store", dest = "device", default = "pdf",
56 help = "Output format (e.g., 'pdf', 'png', 'jpeg')" 61 help = "Output format (e.g., 'pdf', 'png', 'jpg')"
57 ), 62 ),
58 make_option(c("--nolines"), 63 make_option(c("--nolines"),
59 type = "logical", default = FALSE, 64 action = "store_true", dest = "nolines", default = FALSE,
60 help = "Remove borders (lines) around bars (TRUE/FALSE)" 65 help = "Remove borders (lines) around bars (TRUE/FALSE)"
61 ) 66 )
62 ) 67 )
68
63 69
64 # Parse arguments 70 # Parse arguments
65 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) 71 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
66 args <- parse_args(parser, positional_arguments = TRUE) 72 args <- parse_args(parser, positional_arguments = TRUE)
67 opt <- args$options 73 opt <- args$options
68 74
75
69 # Validate required options 76 # Validate required options
70 if (is.null(opt$input) || opt$input == "") { 77 if (is.null(opt$input) || opt$input == "") {
71 stop("Error: Input file is required.") 78 stop("Error: Input file is required.")
72 } 79 }
73 if (is.null(opt$output) || opt$output == "") { 80 if (is.null(opt$output) || opt$output == "") {
74 stop("Error: Output file is required.") 81 stop("Error: Output file is required.")
75 } 82 }
76 83
84 if (is.null(opt$fill) || opt$fill == "") {
85 print(paste("No fill chosen using ASV"))
86 opt$fill <- "ASV"
87 }
88
77 # Load phyloseq object 89 # Load phyloseq object
78 print(paste("Trying to read:", opt$input)) 90 print(paste("Trying to read:", opt$input))
79 physeq <- readRDS(opt$input) 91 physeq <- readRDS(opt$input)
92
93 ## Allow to use OTU as tax group
94 # Extract rownames (taxids) from the tax_table and add them as a new column
95 taxids <- rownames(tax_table(physeq))
96
97 # Get the number of columns in the tax_table
98 num_columns <- ncol(tax_table(physeq))
99
100 # Add the taxids as a new last column in the tax_table
101 tax_table(physeq) <- cbind(tax_table(physeq), taxid = taxids)
102
103 # Rename the last column to 'ASV' / OTU does conflict with phyloseq logic
104 colnames(tax_table(physeq))[num_columns + 1] <- "ASV"
80 105
81 # Normalize to relative abundances if requested 106 # Normalize to relative abundances if requested
82 if (opt$normalize) { 107 if (opt$normalize) {
83 print("Normalizing abundances to sum to 100%...") 108 print("Normalizing abundances to sum to 100%...")
84 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) 109 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x))
85 } 110 }
86 111
87 # Debug: Check available taxonomic ranks 112 # Debug: Check available taxonomic ranks
113
114 tax_ranks <- colnames(tax_table(physeq))
115 sample_vars <- colnames(sample_data(physeq))
116
88 print("Available taxonomic ranks:") 117 print("Available taxonomic ranks:")
89 print(colnames(tax_table(physeq))) 118 print(tax_ranks)
119
120 print("Available metadata:")
121 print(sample_vars)
90 122
91 # Handle missing or unassigned taxa for all ranks 123 # Handle missing or unassigned taxa for all ranks
92 if (opt$keepNonAssigned) { 124 if (opt$keepNonAssigned) {
93 # Replace NA or empty values with 'Not Assigned' for all ranks 125 # Replace NA or empty values with 'Not Assigned' for all ranks
94 tax_ranks <- colnames(tax_table(physeq))
95 126
96 for (rank in tax_ranks) { 127 for (rank in tax_ranks) {
97 if (rank %in% colnames(tax_table(physeq))) { 128 if (rank %in% tax_ranks) {
98 # replace NA or empty values with 'Not Assigned' 129 # replace NA or empty values with 'Not Assigned'
99 tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned" 130 tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned"
100 } 131 }
101 } 132 }
102 } 133 }
122 print("Top taxa:") 153 print("Top taxa:")
123 print(top_taxa) 154 print(top_taxa)
124 155
125 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] 156 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa]
126 157
158 # Group non-top OTUs as 'Others' if requested
127 if (opt$keepOthers) { 159 if (opt$keepOthers) {
160 # Update the tax_table to assign 'Others' to non-top taxa
128 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others" 161 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others"
129 physeq <- physeq_agg 162 physeq <- physeq_agg
130 } else { 163 } else {
131 physeq <- prune_taxa(otus_in_top_taxa, physeq_agg) 164 physeq <- prune_taxa(otus_in_top_taxa, physeq_agg)
132 } 165 }
133 } 166 }
134 167
168
169 # normalize x groups if needed
170 if (opt$x %in% sample_vars) {
171 if (opt$normalize_x && !is.null(opt$x) && opt$x != "") {
172 physeq_agg <- merge_samples(physeq, opt$x)
173
174 physeq <- transform_sample_counts(physeq_agg, function(x) (x / sum(x) * 100))
175 opt$x <- NULL # set to Null since we do not need x for downstream now
176 opt$facet <- NULL # set to Null since facetting does not work with normalize x
177 warning(paste("normalize x does not work with facetting"))
178 }
179 } else {
180 warning(paste("x", opt$x, "not found in sample data. Skipping normalize_x."))
181 }
182
183
184 # Check if the facet variable is valid and exists
185 facet_var <- NULL
186 if (!is.null(opt$facet) && opt$facet != "") {
187 if (opt$facet %in% sample_vars || opt$facet %in% tax_ranks) {
188 facet_var <- opt$facet # Store facet variable for later
189 } else {
190 warning(paste("Facet variable", opt$facet, "not found in sample data or tax ranks. Skipping faceting."))
191 }
192 }
193
194 # Determine if faceting is needed
195 facet_formula <- if (!is.null(facet_var)) as.formula(paste("~", facet_var)) else NULL
196
197 # Define color based on the `nolines` option
198 plot_color <- ifelse(opt$nolines, NA, "black")
199
135 # Generate bar plot 200 # Generate bar plot
136 if (!is.null(opt$x) && opt$x != "") { 201 if (!is.null(opt$x) && opt$x != "") {
137 p <- plot_bar(physeq, x = opt$x, fill = opt$fill) + 202 p <- plot_bar(physeq,
138 geom_bar(aes(fill = !!sym(opt$fill)), 203 x = opt$x,
139 stat = "identity", position = "stack", 204 fill = opt$fill
140 color = ifelse(opt$nolines, NA, "black") 205 ) + facet_wrap(facet_formula, scales = "free_x") +
206 geom_bar(
207 stat = "identity",
208 position = "stack",
209 aes(fill = !!sym(opt$fill)),
210 color = plot_color
141 ) 211 )
142 } else { 212 } else {
143 p <- plot_bar(physeq, fill = opt$fill) + 213 p <- plot_bar(physeq,
144 geom_bar(aes(fill = !!sym(opt$fill)), 214 fill = opt$fill
145 stat = "identity", position = "stack", 215 ) + facet_wrap(facet_formula, scales = "free_x") +
146 color = ifelse(opt$nolines, NA, "black") 216 geom_bar(
217 stat = "identity",
218 position = "stack",
219 aes(fill = !!sym(opt$fill)),
220 color = plot_color
147 ) 221 )
148 } 222 }
149 223
150 # Optional: Add faceting if specified 224
151 if (!is.null(opt$facet) && opt$facet != "") { 225 # Reorder fill levels to ensure "Not Assigned" and "Others" are at the bottom if they exist
152 sample_vars <- colnames(sample_data(physeq)) 226 fill_values <- unique(p$data[[opt$fill]]) # Get unique fill values
153 if (opt$facet %in% sample_vars) { 227 new_levels <- setdiff(fill_values, c("Not Assigned", "Others")) # Exclude "Not Assigned" and "Others"
154 p <- p + facet_wrap(as.formula(paste("~", opt$facet))) 228
155 } else { 229 if ("Not Assigned" %in% fill_values) {
156 warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting.")) 230 new_levels <- c("Not Assigned", new_levels) # Place "Not Assigned" at the bottom if it exists
157 } 231 }
158 } 232 if ("Others" %in% fill_values) {
233 new_levels <- c("Others", new_levels) # Place "Others" at the bottom if it exists
234 }
235
236 # Apply the new levels to the fill variable in the plot data
237 p$data[[opt$fill]] <- factor(p$data[[opt$fill]], levels = new_levels)
159 238
160 # Save to output file 239 # Save to output file
161 ggsave( 240 ggsave(
162 filename = opt$output, 241 filename = opt$output,
163 plot = p, 242 plot = p,