annotate phyloseq_tax_glom.R @ 10:7ed99cb39c9d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
author iuc
date Fri, 04 Apr 2025 10:15:49 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
1 suppressPackageStartupMessages(library("phyloseq"))
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
2 suppressPackageStartupMessages(library("dplyr"))
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
3 suppressPackageStartupMessages(library("optparse"))
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
4
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
5 # Define command-line options
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
6 option_list <- list(
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
7 make_option(c("-i", "--input"), type = "character", help = "Path to the phyloseq RDS file", metavar = "FILE"),
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
8 make_option(c("-r", "--rank"), type = "character", help = "Taxonomic rank for aggregation"),
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
9 make_option("--exclude_otu_ids", action = "store_true", default = FALSE, help = "Exclude OTU IDs from output"),
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
10 make_option("--single_rank", action = "store_true", default = FALSE, help = "Only output the specified rank column"),
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
11 make_option("--exclude_na_values", action = "store_true", default = FALSE, help = "Exclude NA values during tax_glom")
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
12 )
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
13
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
14 # Parse arguments
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
15 opt <- parse_args(OptionParser(option_list = option_list))
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
16
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
17 # Validate arguments
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
18 if (is.null(opt$input) || is.null(opt$rank)) {
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
19 stop("Error: --input and --rank are required arguments.")
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
20 }
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
21
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
22 if (opt$single_rank && !opt$exclude_otu_ids) {
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
23 stop("Error: --single_rank can only be used if --exclude_otu_ids is also specified.")
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
24 }
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
25
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
26 # Load the phyloseq object
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
27 physeq <- readRDS(opt$input)
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
28
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
29 # Print available taxonomic ranks
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
30 cat("Available taxonomic ranks:\n")
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
31 print(rank_names(physeq))
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
32
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
33 # Print original number of OTUs
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
34 cat("Original number of OTUs:", ntaxa(physeq), "\n")
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
35
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
36 # Perform tax_glom
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
37 physeq_agg <- tax_glom(physeq, taxrank = opt$rank, NArm = opt$exclude_na_values)
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
38
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
39 # Print new number of taxa after agglomeration
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
40 cat("Number of taxa after agglomeration at", opt$rank, "level:", ntaxa(physeq_agg), "\n")
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
41
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
42 # Extract the taxonomy table after agglomeration
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
43 tax_table_agg <- as.data.frame(tax_table(physeq_agg))
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
44
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
45 # Convert taxonomic columns to character to preserve NA values
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
46 tax_table_agg[] <- lapply(tax_table_agg, as.character)
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
47
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
48 # Add OTU ID column unless excluded
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
49 if (!opt$exclude_otu_ids) {
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
50 tax_table_agg <- cbind("OTU ID" = rownames(tax_table_agg), tax_table_agg)
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
51 }
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
52
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
53 # Extract OTU abundance table and convert to data frame
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
54 otu_table_agg <- as.data.frame(otu_table(physeq_agg))
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
55
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
56 # Append taxonomic information to output
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
57 otu_table_agg <- cbind(tax_table_agg, otu_table_agg)
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
58
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
59 tax_table_agg <- otu_table_agg
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
60
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
61 if (opt$single_rank) {
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
62 # Keep only the specified taxonomic rank column and numeric count columns
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
63 tax_table_agg <- tax_table_agg %>% select(all_of(opt$rank), where(is.numeric))
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
64
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
65 # Group by taxonomic rank and sum the counts
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
66 tax_table_agg <- tax_table_agg %>%
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
67 group_by(across(all_of(opt$rank))) %>%
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
68 summarise(across(where(is.numeric), sum), .groups = "drop")
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
69 }
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
70
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
71 # Save the output as a TSV file
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
72 output_file <- paste0("physeq_", opt$rank, "_table.tsv")
7ed99cb39c9d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff changeset
73 write.table(tax_table_agg, file = output_file, sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE)