Repository 'phyloseq_plot_richness'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/phyloseq_plot_richness

Changeset 10:7ed99cb39c9d (2025-04-04)
Previous changeset 9:10a7732528b2 (2025-03-13)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
added:
phyloseq_tax_glom.R
test-data/SRR1770594.phyloseq
test-data/tax_glom_output0.tabular
test-data/tax_glom_output1.tabular
test-data/tax_glom_output2.tabular
test-data/tax_glom_output3.tabular
test-data/tax_glom_output4.tabular
test-data/tax_glom_output5.tabular
b
diff -r 10a7732528b2 -r 7ed99cb39c9d phyloseq_tax_glom.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_tax_glom.R Fri Apr 04 10:15:49 2025 +0000
[
@@ -0,0 +1,73 @@
+suppressPackageStartupMessages(library("phyloseq"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("optparse"))
+
+# Define command-line options
+option_list <- list(
+    make_option(c("-i", "--input"), type = "character", help = "Path to the phyloseq RDS file", metavar = "FILE"),
+    make_option(c("-r", "--rank"), type = "character", help = "Taxonomic rank for aggregation"),
+    make_option("--exclude_otu_ids", action = "store_true", default = FALSE, help = "Exclude OTU IDs from output"),
+    make_option("--single_rank", action = "store_true", default = FALSE, help = "Only output the specified rank column"),
+    make_option("--exclude_na_values", action = "store_true", default = FALSE, help = "Exclude NA values during tax_glom")
+)
+
+# Parse arguments
+opt <- parse_args(OptionParser(option_list = option_list))
+
+# Validate arguments
+if (is.null(opt$input) || is.null(opt$rank)) {
+    stop("Error: --input and --rank are required arguments.")
+}
+
+if (opt$single_rank && !opt$exclude_otu_ids) {
+    stop("Error: --single_rank can only be used if --exclude_otu_ids is also specified.")
+}
+
+# Load the phyloseq object
+physeq <- readRDS(opt$input)
+
+# Print available taxonomic ranks
+cat("Available taxonomic ranks:\n")
+print(rank_names(physeq))
+
+# Print original number of OTUs
+cat("Original number of OTUs:", ntaxa(physeq), "\n")
+
+# Perform tax_glom
+physeq_agg <- tax_glom(physeq, taxrank = opt$rank, NArm = opt$exclude_na_values)
+
+# Print new number of taxa after agglomeration
+cat("Number of taxa after agglomeration at", opt$rank, "level:", ntaxa(physeq_agg), "\n")
+
+# Extract the taxonomy table after agglomeration
+tax_table_agg <- as.data.frame(tax_table(physeq_agg))
+
+# Convert taxonomic columns to character to preserve NA values
+tax_table_agg[] <- lapply(tax_table_agg, as.character)
+
+# Add OTU ID column unless excluded
+if (!opt$exclude_otu_ids) {
+    tax_table_agg <- cbind("OTU ID" = rownames(tax_table_agg), tax_table_agg)
+}
+
+# Extract OTU abundance table and convert to data frame
+otu_table_agg <- as.data.frame(otu_table(physeq_agg))
+
+# Append taxonomic information to output
+otu_table_agg <- cbind(tax_table_agg, otu_table_agg)
+
+tax_table_agg <- otu_table_agg
+
+if (opt$single_rank) {
+    # Keep only the specified taxonomic rank column and numeric count columns
+    tax_table_agg <- tax_table_agg %>% select(all_of(opt$rank), where(is.numeric))
+
+    # Group by taxonomic rank and sum the counts
+    tax_table_agg <- tax_table_agg %>%
+        group_by(across(all_of(opt$rank))) %>%
+        summarise(across(where(is.numeric), sum), .groups = "drop")
+}
+
+# Save the output as a TSV file
+output_file <- paste0("physeq_", opt$rank, "_table.tsv")
+write.table(tax_table_agg, file = output_file, sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE)
b
diff -r 10a7732528b2 -r 7ed99cb39c9d test-data/SRR1770594.phyloseq
b
Binary file test-data/SRR1770594.phyloseq has changed
b
diff -r 10a7732528b2 -r 7ed99cb39c9d test-data/tax_glom_output0.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output0.tabular Fri Apr 04 10:15:49 2025 +0000
b
@@ -0,0 +1,18 @@
+OTU ID Superkingdom Kingdom Phylum Class Order Family Genus sa1
+55939 sk__Archaea k__ p__Thaumarchaeota c__Nitrososphaeria NA NA NA 1
+220170 sk__Archaea k__ p__Thaumarchaeota c__Nitrososphaeria o__Nitrososphaerales f__Nitrososphaeraceae NA 1
+107861 sk__Bacteria k__ p__Acidobacteria c__Acidobacteriia o__Bryobacterales f__Bryobacteraceae g__Bryobacter 2
+196447 sk__Bacteria k__ p__Acidobacteria c__Thermoanaerobaculia o__Thermoanaerobaculales f__Thermoanaerobaculaceae NA 1
+23596 sk__Bacteria k__ p__Actinobacteria NA NA NA NA 1
+126258 sk__Bacteria k__ p__Actinobacteria c__Acidimicrobiia NA NA NA 2
+105940 sk__Bacteria k__ p__Actinobacteria c__Actinobacteria NA NA NA 1
+8251 sk__Bacteria k__ p__Actinobacteria c__Actinobacteria o__Micromonosporales f__Micromonosporaceae NA 2
+58206 sk__Bacteria k__ p__Actinobacteria c__Rubrobacteria o__Gaiellales NA NA 1
+127789 sk__Bacteria k__ p__Bacteroidetes c__Chitinophagia o__Chitinophagales NA NA 1
+173872 sk__Bacteria k__ p__Bacteroidetes c__Chitinophagia o__Chitinophagales f__Chitinophagaceae g__Flavisolibacter 1
+176861 sk__Bacteria k__ p__Bacteroidetes c__Cytophagia o__Cytophagales NA NA 1
+166810 sk__Bacteria k__ p__Bacteroidetes c__Cytophagia o__Cytophagales f__Microscillaceae NA 1
+93849 sk__Bacteria k__ p__Cyanobacteria NA NA NA NA 1
+111794 sk__Bacteria k__ p__Proteobacteria c__Alphaproteobacteria o__Rhizobiales f__Bradyrhizobiaceae NA 1
+83155 sk__Bacteria k__ p__Verrucomicrobia c__Verrucomicrobiae NA NA NA 2
+125725 sk__Eukaryota k__ p__ c__Bigyra o__Amphifilida f__ g__Sorodiplophrys 1
b
diff -r 10a7732528b2 -r 7ed99cb39c9d test-data/tax_glom_output1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output1.tabular Fri Apr 04 10:15:49 2025 +0000
b
@@ -0,0 +1,4 @@
+OTU ID Superkingdom Kingdom Phylum Class Order Family Genus sa1
+107861 sk__Bacteria k__ p__Acidobacteria c__Acidobacteriia o__Bryobacterales f__Bryobacteraceae g__Bryobacter 2
+173872 sk__Bacteria k__ p__Bacteroidetes c__Chitinophagia o__Chitinophagales f__Chitinophagaceae g__Flavisolibacter 1
+125725 sk__Eukaryota k__ p__ c__Bigyra o__Amphifilida f__ g__Sorodiplophrys 1
b
diff -r 10a7732528b2 -r 7ed99cb39c9d test-data/tax_glom_output2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output2.tabular Fri Apr 04 10:15:49 2025 +0000
b
@@ -0,0 +1,4 @@
+Superkingdom Kingdom Phylum Class Order Family Genus sa1
+sk__Bacteria k__ p__Acidobacteria c__Acidobacteriia o__Bryobacterales f__Bryobacteraceae g__Bryobacter 2
+sk__Bacteria k__ p__Bacteroidetes c__Chitinophagia o__Chitinophagales f__Chitinophagaceae g__Flavisolibacter 1
+sk__Eukaryota k__ p__ c__Bigyra o__Amphifilida f__ g__Sorodiplophrys 1
b
diff -r 10a7732528b2 -r 7ed99cb39c9d test-data/tax_glom_output3.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output3.tabular Fri Apr 04 10:15:49 2025 +0000
b
@@ -0,0 +1,4 @@
+Genus sa1
+g__Bryobacter 2
+g__Flavisolibacter 1
+g__Sorodiplophrys 1
b
diff -r 10a7732528b2 -r 7ed99cb39c9d test-data/tax_glom_output4.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output4.tabular Fri Apr 04 10:15:49 2025 +0000
b
@@ -0,0 +1,25 @@
+OTU ID Kingdom Phylum Class Order Family Genus SRR14190457 SRR14190458
+ASV1 Bacteria Firmicutes Bacilli Lactobacillales Lactobacillaceae NA 914 534
+ASV2 Bacteria Bacteroidota Bacteroidia Bacteroidales Bacteroidaceae NA 488 215
+ASV3 Bacteria Actinobacteriota Actinobacteria Bifidobacteriales Bifidobacteriaceae NA 199 157
+ASV5 Bacteria Firmicutes Clostridia Lachnospirales Lachnospiraceae NA 531 419
+ASV9 Bacteria Firmicutes Bacilli Bacillales Bacillaceae NA 88 26
+ASV20 Bacteria Firmicutes Bacilli Erysipelotrichales Erysipelotrichaceae NA 101 99
+ASV28 Bacteria Firmicutes Clostridia Oscillospirales Butyricicoccaceae NA 55 47
+ASV29 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Sutterellaceae NA 54 47
+ASV31 Bacteria Firmicutes Negativicutes Veillonellales-Selenomonadales Veillonellaceae NA 119 83
+ASV34 Bacteria Bacteroidota Bacteroidia Bacteroidales Porphyromonadaceae NA 51 53
+ASV36 Bacteria Firmicutes Negativicutes Acidaminococcales Acidaminococcaceae NA 47 54
+ASV41 Bacteria Firmicutes Bacilli Mycoplasmatales Mycoplasmataceae NA 42 58
+ASV43 Bacteria Firmicutes Bacilli Lactobacillales Streptococcaceae NA 40 60
+ASV45 Bacteria Firmicutes Bacilli Staphylococcales Staphylococcaceae NA 38 62
+ASV47 Bacteria Bacteroidota Bacteroidia Bacteroidales Prevotellaceae NA 32 66
+ASV51 Bacteria Firmicutes Clostridia Oscillospirales Oscillospiraceae NA 28 71
+ASV52 Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae NA 28 71
+ASV55 Bacteria Actinobacteriota Actinobacteria Corynebacteriales Corynebacteriaceae NA 26 86
+ASV59 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae NA 22 104
+ASV60 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae NA 503 607
+ASV61 Bacteria Firmicutes Clostridia Peptostreptococcales-Tissierellales Peptostreptococcaceae NA 22 128
+ASV62 Bacteria Actinobacteriota Actinobacteria Propionibacteriales Propionibacteriaceae NA 16 129
+ASV63 Bacteria Bacteroidota Bacteroidia Bacteroidales Muribaculaceae NA 15 136
+ASV64 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae NA 11 178
b
diff -r 10a7732528b2 -r 7ed99cb39c9d test-data/tax_glom_output5.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output5.tabular Fri Apr 04 10:15:49 2025 +0000
b
@@ -0,0 +1,25 @@
+Family SRR14190457 SRR14190458
+Acidaminococcaceae 47 54
+Bacillaceae 88 26
+Bacteroidaceae 488 215
+Bifidobacteriaceae 199 157
+Butyricicoccaceae 55 47
+Chitinophagaceae 11 178
+Corynebacteriaceae 26 86
+Enterobacteriaceae 503 607
+Erysipelotrichaceae 101 99
+Lachnospiraceae 531 419
+Lactobacillaceae 914 534
+Muribaculaceae 15 136
+Mycoplasmataceae 42 58
+Oscillospiraceae 28 71
+Peptostreptococcaceae 22 128
+Porphyromonadaceae 51 53
+Prevotellaceae 32 66
+Propionibacteriaceae 16 129
+Pseudomonadaceae 22 104
+Ruminococcaceae 28 71
+Staphylococcaceae 38 62
+Streptococcaceae 40 60
+Sutterellaceae 54 47
+Veillonellaceae 119 83