comparison phyloseq_plot_bar.R @ 6:a20bc31f2821 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 26f87cc62468c9c329b33246af4f14e2659856f9
author iuc
date Fri, 10 Jan 2025 14:58:27 +0000
parents 1a6c2cc92c6e
children
comparison
equal deleted inserted replaced
5:ba9c14a1064e 6:a20bc31f2821
24 help = "Facet by variable (optional)" 24 help = "Facet by variable (optional)"
25 ), 25 ),
26 make_option(c("--output"), 26 make_option(c("--output"),
27 action = "store", dest = "output", 27 action = "store", dest = "output",
28 help = "Output file (PDF)" 28 help = "Output file (PDF)"
29 ),
30 make_option(c("--topX"),
31 action = "store", dest = "topX", default = NULL,
32 help = "Show only the top X taxa based on abundance (e.g., '10') (optional)"
33 ),
34 make_option(c("--keepOthers"),
35 action = "store_true", dest = "keepOthers", default = FALSE,
36 help = "Keep taxa not in topX and label them as 'Others' (optional)"
37 ),
38 make_option(c("--keepNonAssigned"),
39 action = "store_true", dest = "keepNonAssigned", default = FALSE,
40 help = "Keep taxa labeled as 'Not Assigned' (optional)"
41 ),
42 make_option(c("--normalize"),
43 action = "store_true", dest = "normalize", default = FALSE,
44 help = "Normalize abundances to sum to 100% (optional)"
45 ),
46 make_option(c("--width"),
47 action = "store", dest = "width", default = 10,
48 type = "numeric", help = "Width of the output plot in inches"
49 ),
50 make_option(c("--height"),
51 action = "store", dest = "height", default = 8,
52 type = "numeric", help = "Height of the output plot in inches"
53 ),
54 make_option(c("--device"),
55 action = "store", dest = "device", default = "pdf",
56 help = "Output format (e.g., 'pdf', 'png', 'jpeg')"
29 ) 57 )
30 ) 58 )
31 59
32 # Parse arguments 60 # Parse arguments
33 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) 61 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
36 64
37 # Validate required options 65 # Validate required options
38 if (is.null(opt$input) || opt$input == "") { 66 if (is.null(opt$input) || opt$input == "") {
39 stop("Error: Input file is required.") 67 stop("Error: Input file is required.")
40 } 68 }
41 if (is.null(opt$x) || opt$x == "") {
42 stop("Error: X-axis variable is required.")
43 }
44 if (is.null(opt$output) || opt$output == "") { 69 if (is.null(opt$output) || opt$output == "") {
45 stop("Error: Output file is required.") 70 stop("Error: Output file is required.")
46 } 71 }
47 72
48 # Load phyloseq object 73 # Load phyloseq object
49 print(paste("Trying to read:", opt$input)) 74 print(paste("Trying to read:", opt$input))
50 physeq <- readRDS(opt$input) 75 physeq <- readRDS(opt$input)
51 76
77 # Normalize to relative abundances if requested
78 if (opt$normalize) {
79 print("Normalizing abundances to sum to 100%...")
80 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x))
81 }
82
83 if (opt$keepNonAssigned) {
84 # Add synthetic "Not Assigned" for missing/NA taxa
85 tax_table(physeq) <- apply(tax_table(physeq), c(1, 2), function(x) ifelse(is.na(x) | x == "", "Not Assigned", x))
86 }
52 # Check if the 'x' and 'fill' variables are valid 87 # Check if the 'x' and 'fill' variables are valid
53 sample_vars <- colnames(sample_data(physeq)) 88 sample_vars <- colnames(sample_data(physeq))
54 if (!opt$x %in% sample_vars) { 89
55 stop(paste("Error: X-axis variable", opt$x, "does not exist in the sample data.")) 90 # If topX is provided, filter the phyloseq object to show only top X taxa
91 if (!is.null(opt$topX) && opt$topX != "") {
92 topX <- as.numeric(opt$topX)
93 if (is.na(topX) || topX <= 0) {
94 stop("Error: topX should be a positive number.")
95 }
96
97 # Aggregate the data at the selected rank (e.g., Phylum)
98 tax_rank <- opt$fill # Adjust as necessary
99 physeq_agg <- tax_glom(physeq, taxrank = tax_rank)
100
101 # Get the abundance of each taxon at the selected rank
102 taxa_abundance <- taxa_sums(physeq_agg)
103
104 # Summarize the abundance at each taxonomic rank (grouping by taxonomic name)
105 tax_table_agg <- tax_table(physeq_agg)
106 taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum)
107
108 # Identify the top X taxa by summed abundance
109 top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX]
110
111 print("Only plotting taxa in TopX taxa group:")
112 print(top_taxa)
113
114 # Get the OTUs corresponding to the top taxa
115 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa]
116
117 if (opt$keepOthers) {
118 # Label taxa not in top_taxa as "Others"
119 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% otus_in_top_taxa] <- "Others"
120 physeq <- physeq_agg
121 } else {
122 # Subset the phyloseq object to keep only the top X taxa
123 physeq_filtered <- prune_taxa(otus_in_top_taxa, physeq_agg)
124 physeq <- physeq_filtered
125 }
56 } 126 }
57 127
58 # Generate bar plot 128 # Generate bar plot
59 p <- plot_bar(physeq, x = opt$x, fill = opt$fill) 129 if (!is.null(opt$x) && opt$x != "") {
130 p <- plot_bar(physeq, x = opt$x, fill = opt$fill)
131 } else {
132 p <- plot_bar(physeq, fill = opt$fill) # If no x is provided, don't include x
133 }
60 134
61 # Only facet if the facet variable is provided and exists in the sample data 135 # Only facet if the facet variable is provided and exists in the sample data
62 if (!is.null(opt$facet) && opt$facet != "") { 136 if (!is.null(opt$facet) && opt$facet != "") {
63 if (opt$facet %in% sample_vars) { 137 if (opt$facet %in% sample_vars) {
64 p <- p + facet_wrap(as.formula(paste("~", opt$facet))) 138 p <- p + facet_wrap(as.formula(paste("~", opt$facet)))
65 } else { 139 } else {
66 warning(paste("Facet variable", opt$facet, "does not exist in the sample data. Faceting will be skipped.")) 140 warning(paste("Facet variable", opt$facet, "does not exist in the sample data. Faceting will be skipped."))
67 } 141 }
68 } 142 }
69 143
70 # Save to output file using PDF device 144 # Save to output file
71 print(paste("Saving plot to:", opt$output)) 145 ggsave(
72 pdf(file = opt$output, width = 10, height = 8) 146 filename = opt$output,
73 print(p) 147 plot = p,
74 dev.off() 148 width = opt$width,
149 height = opt$height,
150 device = opt$device
151 )