Mercurial > repos > iuc > phyloseq_from_dada2
comparison phyloseq_plot_bar.R @ 9:6000b8a8dd9d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit a5ae2f86b2955290a4c81ab234f02cc51020f970
author | iuc |
---|---|
date | Thu, 13 Mar 2025 09:49:14 +0000 |
parents | 0b9d4ec77c4f |
children |
comparison
equal
deleted
inserted
replaced
8:0b9d4ec77c4f | 9:6000b8a8dd9d |
---|---|
2 | 2 |
3 # Load libraries | 3 # Load libraries |
4 suppressPackageStartupMessages(library("optparse")) | 4 suppressPackageStartupMessages(library("optparse")) |
5 suppressPackageStartupMessages(library("phyloseq")) | 5 suppressPackageStartupMessages(library("phyloseq")) |
6 suppressPackageStartupMessages(library("ggplot2")) | 6 suppressPackageStartupMessages(library("ggplot2")) |
7 suppressPackageStartupMessages(library("dplyr")) | |
7 | 8 |
8 # Define options | 9 # Define options |
9 option_list <- list( | 10 option_list <- list( |
10 make_option(c("--input"), | 11 make_option(c("--input"), |
11 action = "store", dest = "input", | 12 action = "store", dest = "input", |
15 action = "store", dest = "x", | 16 action = "store", dest = "x", |
16 help = "Variable for x-axis (e.g., 'Sample', 'Phylum')" | 17 help = "Variable for x-axis (e.g., 'Sample', 'Phylum')" |
17 ), | 18 ), |
18 make_option(c("--fill"), | 19 make_option(c("--fill"), |
19 action = "store", dest = "fill", default = NULL, | 20 action = "store", dest = "fill", default = NULL, |
20 help = "Variable for fill color (e.g., 'Genus', 'Order') (optional)" | 21 help = "Variable for fill color (e.g., 'Genus', 'Order'). Use 'ASV' as argument to show each OTU/ASV." |
21 ), | 22 ), |
22 make_option(c("--facet"), | 23 make_option(c("--facet"), |
23 action = "store", dest = "facet", default = NULL, | 24 action = "store", dest = "facet", default = NULL, |
24 help = "Facet by variable (optional)" | 25 help = "Facet by variable (optional)" |
25 ), | 26 ), |
41 ), | 42 ), |
42 make_option(c("--normalize"), | 43 make_option(c("--normalize"), |
43 action = "store_true", dest = "normalize", default = FALSE, | 44 action = "store_true", dest = "normalize", default = FALSE, |
44 help = "Normalize abundances to sum to 100% (optional)" | 45 help = "Normalize abundances to sum to 100% (optional)" |
45 ), | 46 ), |
47 make_option(c("--normalize_x"), | |
48 action = "store_true", dest = "normalize_x", default = FALSE, | |
49 help = "Normalize x groups to sum up to 100%" | |
50 ), | |
46 make_option(c("--width"), | 51 make_option(c("--width"), |
47 action = "store", dest = "width", default = 10, | 52 action = "store", dest = "width", default = 10, |
48 type = "numeric", help = "Width of the output plot in inches" | 53 type = "numeric", help = "Width of the output plot in inches" |
49 ), | 54 ), |
50 make_option(c("--height"), | 55 make_option(c("--height"), |
51 action = "store", dest = "height", default = 8, | 56 action = "store", dest = "height", default = 8, |
52 type = "numeric", help = "Height of the output plot in inches" | 57 type = "numeric", help = "Height of the output plot in inches" |
53 ), | 58 ), |
54 make_option(c("--device"), | 59 make_option(c("--device"), |
55 action = "store", dest = "device", default = "pdf", | 60 action = "store", dest = "device", default = "pdf", |
56 help = "Output format (e.g., 'pdf', 'png', 'jpeg')" | 61 help = "Output format (e.g., 'pdf', 'png', 'jpg')" |
57 ), | 62 ), |
58 make_option(c("--nolines"), | 63 make_option(c("--nolines"), |
59 type = "logical", default = FALSE, | 64 action = "store_true", dest = "nolines", default = FALSE, |
60 help = "Remove borders (lines) around bars (TRUE/FALSE)" | 65 help = "Remove borders (lines) around bars (TRUE/FALSE)" |
61 ) | 66 ) |
62 ) | 67 ) |
68 | |
63 | 69 |
64 # Parse arguments | 70 # Parse arguments |
65 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) | 71 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) |
66 args <- parse_args(parser, positional_arguments = TRUE) | 72 args <- parse_args(parser, positional_arguments = TRUE) |
67 opt <- args$options | 73 opt <- args$options |
68 | 74 |
75 | |
69 # Validate required options | 76 # Validate required options |
70 if (is.null(opt$input) || opt$input == "") { | 77 if (is.null(opt$input) || opt$input == "") { |
71 stop("Error: Input file is required.") | 78 stop("Error: Input file is required.") |
72 } | 79 } |
73 if (is.null(opt$output) || opt$output == "") { | 80 if (is.null(opt$output) || opt$output == "") { |
74 stop("Error: Output file is required.") | 81 stop("Error: Output file is required.") |
75 } | 82 } |
76 | 83 |
84 if (is.null(opt$fill) || opt$fill == "") { | |
85 print(paste("No fill chosen using ASV")) | |
86 opt$fill <- "ASV" | |
87 } | |
88 | |
77 # Load phyloseq object | 89 # Load phyloseq object |
78 print(paste("Trying to read:", opt$input)) | 90 print(paste("Trying to read:", opt$input)) |
79 physeq <- readRDS(opt$input) | 91 physeq <- readRDS(opt$input) |
92 | |
93 ## Allow to use OTU as tax group | |
94 # Extract rownames (taxids) from the tax_table and add them as a new column | |
95 taxids <- rownames(tax_table(physeq)) | |
96 | |
97 # Get the number of columns in the tax_table | |
98 num_columns <- ncol(tax_table(physeq)) | |
99 | |
100 # Add the taxids as a new last column in the tax_table | |
101 tax_table(physeq) <- cbind(tax_table(physeq), taxid = taxids) | |
102 | |
103 # Rename the last column to 'ASV' / OTU does conflict with phyloseq logic | |
104 colnames(tax_table(physeq))[num_columns + 1] <- "ASV" | |
80 | 105 |
81 # Normalize to relative abundances if requested | 106 # Normalize to relative abundances if requested |
82 if (opt$normalize) { | 107 if (opt$normalize) { |
83 print("Normalizing abundances to sum to 100%...") | 108 print("Normalizing abundances to sum to 100%...") |
84 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) | 109 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) |
85 } | 110 } |
86 | 111 |
87 # Debug: Check available taxonomic ranks | 112 # Debug: Check available taxonomic ranks |
113 | |
114 tax_ranks <- colnames(tax_table(physeq)) | |
115 sample_vars <- colnames(sample_data(physeq)) | |
116 | |
88 print("Available taxonomic ranks:") | 117 print("Available taxonomic ranks:") |
89 print(colnames(tax_table(physeq))) | 118 print(tax_ranks) |
119 | |
120 print("Available metadata:") | |
121 print(sample_vars) | |
90 | 122 |
91 # Handle missing or unassigned taxa for all ranks | 123 # Handle missing or unassigned taxa for all ranks |
92 if (opt$keepNonAssigned) { | 124 if (opt$keepNonAssigned) { |
93 # Replace NA or empty values with 'Not Assigned' for all ranks | 125 # Replace NA or empty values with 'Not Assigned' for all ranks |
94 tax_ranks <- colnames(tax_table(physeq)) | |
95 | 126 |
96 for (rank in tax_ranks) { | 127 for (rank in tax_ranks) { |
97 if (rank %in% colnames(tax_table(physeq))) { | 128 if (rank %in% tax_ranks) { |
98 # replace NA or empty values with 'Not Assigned' | 129 # replace NA or empty values with 'Not Assigned' |
99 tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned" | 130 tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned" |
100 } | 131 } |
101 } | 132 } |
102 } | 133 } |
122 print("Top taxa:") | 153 print("Top taxa:") |
123 print(top_taxa) | 154 print(top_taxa) |
124 | 155 |
125 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] | 156 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] |
126 | 157 |
158 # Group non-top OTUs as 'Others' if requested | |
127 if (opt$keepOthers) { | 159 if (opt$keepOthers) { |
160 # Update the tax_table to assign 'Others' to non-top taxa | |
128 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others" | 161 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others" |
129 physeq <- physeq_agg | 162 physeq <- physeq_agg |
130 } else { | 163 } else { |
131 physeq <- prune_taxa(otus_in_top_taxa, physeq_agg) | 164 physeq <- prune_taxa(otus_in_top_taxa, physeq_agg) |
132 } | 165 } |
133 } | 166 } |
134 | 167 |
168 | |
169 # normalize x groups if needed | |
170 if (opt$x %in% sample_vars) { | |
171 if (opt$normalize_x && !is.null(opt$x) && opt$x != "") { | |
172 physeq_agg <- merge_samples(physeq, opt$x) | |
173 | |
174 physeq <- transform_sample_counts(physeq_agg, function(x) (x / sum(x) * 100)) | |
175 opt$x <- NULL # set to Null since we do not need x for downstream now | |
176 opt$facet <- NULL # set to Null since facetting does not work with normalize x | |
177 warning(paste("normalize x does not work with facetting")) | |
178 } | |
179 } else { | |
180 warning(paste("x", opt$x, "not found in sample data. Skipping normalize_x.")) | |
181 } | |
182 | |
183 | |
184 # Check if the facet variable is valid and exists | |
185 facet_var <- NULL | |
186 if (!is.null(opt$facet) && opt$facet != "") { | |
187 if (opt$facet %in% sample_vars || opt$facet %in% tax_ranks) { | |
188 facet_var <- opt$facet # Store facet variable for later | |
189 } else { | |
190 warning(paste("Facet variable", opt$facet, "not found in sample data or tax ranks. Skipping faceting.")) | |
191 } | |
192 } | |
193 | |
194 # Determine if faceting is needed | |
195 facet_formula <- if (!is.null(facet_var)) as.formula(paste("~", facet_var)) else NULL | |
196 | |
197 # Define color based on the `nolines` option | |
198 plot_color <- ifelse(opt$nolines, NA, "black") | |
199 | |
135 # Generate bar plot | 200 # Generate bar plot |
136 if (!is.null(opt$x) && opt$x != "") { | 201 if (!is.null(opt$x) && opt$x != "") { |
137 p <- plot_bar(physeq, x = opt$x, fill = opt$fill) + | 202 p <- plot_bar(physeq, |
138 geom_bar(aes(fill = !!sym(opt$fill)), | 203 x = opt$x, |
139 stat = "identity", position = "stack", | 204 fill = opt$fill |
140 color = ifelse(opt$nolines, NA, "black") | 205 ) + facet_wrap(facet_formula, scales = "free_x") + |
206 geom_bar( | |
207 stat = "identity", | |
208 position = "stack", | |
209 aes(fill = !!sym(opt$fill)), | |
210 color = plot_color | |
141 ) | 211 ) |
142 } else { | 212 } else { |
143 p <- plot_bar(physeq, fill = opt$fill) + | 213 p <- plot_bar(physeq, |
144 geom_bar(aes(fill = !!sym(opt$fill)), | 214 fill = opt$fill |
145 stat = "identity", position = "stack", | 215 ) + facet_wrap(facet_formula, scales = "free_x") + |
146 color = ifelse(opt$nolines, NA, "black") | 216 geom_bar( |
217 stat = "identity", | |
218 position = "stack", | |
219 aes(fill = !!sym(opt$fill)), | |
220 color = plot_color | |
147 ) | 221 ) |
148 } | 222 } |
149 | 223 |
150 # Optional: Add faceting if specified | 224 |
151 if (!is.null(opt$facet) && opt$facet != "") { | 225 # Reorder fill levels to ensure "Not Assigned" and "Others" are at the bottom if they exist |
152 sample_vars <- colnames(sample_data(physeq)) | 226 fill_values <- unique(p$data[[opt$fill]]) # Get unique fill values |
153 if (opt$facet %in% sample_vars) { | 227 new_levels <- setdiff(fill_values, c("Not Assigned", "Others")) # Exclude "Not Assigned" and "Others" |
154 p <- p + facet_wrap(as.formula(paste("~", opt$facet))) | 228 |
155 } else { | 229 if ("Not Assigned" %in% fill_values) { |
156 warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting.")) | 230 new_levels <- c("Not Assigned", new_levels) # Place "Not Assigned" at the bottom if it exists |
157 } | 231 } |
158 } | 232 if ("Others" %in% fill_values) { |
233 new_levels <- c("Others", new_levels) # Place "Others" at the bottom if it exists | |
234 } | |
235 | |
236 # Apply the new levels to the fill variable in the plot data | |
237 p$data[[opt$fill]] <- factor(p$data[[opt$fill]], levels = new_levels) | |
159 | 238 |
160 # Save to output file | 239 # Save to output file |
161 ggsave( | 240 ggsave( |
162 filename = opt$output, | 241 filename = opt$output, |
163 plot = p, | 242 plot = p, |