## Import packages
library(phyloseq)
library(ggplot2)
library(ape)
## Settin variables
## The ASV abundance matrix with taxonomy annotation file (biom format)
# biomfile <- ""
## The sample metadata file(TSV format)
# samplefile <- ""
## (optional) the ASV tree file (nwk format). Write "None" if you do not have any tree
# treefile <- ""
## The ordered taxonomic levels stored in BIOM. Each level is separated by one space.
## default : "Kingdom Phylum Class Order Family Genus Species"
# ranks <- ""
## Do you want to normalise your data ? "True" or "False"
# normalisation <- ""
## Create input and parameters dataframe
# params <- data.frame( "biomfile" = biomfile, "samplefile" = samplefile, "tree" = tree, "ranks" = ranks, "normalisation" = normalisation)
## Import data
biomfile <- params$biomfile
data <- import_biom(biomfile)
sampledata <- read.csv(params$samplefile, sep = "\t", row.names = 1, check.names = FALSE)
# if taxonomy starts with k__ it means that its Greengenes like format
# import need to be done using parse_taxonomy_greengenes function
# in this case user taxonomic rank names are ignored
tax <- tax_table(data)[[1]]
if ((gregexpr('k__', tax))[[1]][1]>0) {
cat("Warning : Taxonomic affiliations come from Greengenes database, user specified ranks names are ignored.")
data <- import_biom(biomfile, parseFunction = parse_taxonomy_greengenes)
} else {
## else, custumize rank name with the user specified ranks variable
new_rank <- as.list(strsplit(params$ranks, " ")[[1]])
colnames(tax_table(data)) <- new_rank
}
Warning : Taxonomic affiliations come from Greengenes database, user specified ranks names are ignored.
## add sample name to metadata, as SampleID variable
sampledata$SampleID <- rownames(sampledata)
sample_data(data) <- sampledata
## add tree metadata if available
if (params$treefile != "None"){
treefile <- read.tree(params$treefile)
phy_tree(data) <- treefile
}
## change de sample metadata order as in input samplefile
for ( variable in sample_variables(data)){
variable.order = as.vector(unique(sampledata[,variable]))
sample_data(data)[,variable] <- factor(get_variable(data, variable),levels=variable.order)
}
## remove empty samples
empty_samples <- sample_names(data)[which(sample_sums(data)==0)]
sample_to_keep <- sample_names(data)[which(sample_sums(data)>0)]
data <- prune_samples(sample_to_keep, data)
empty_taxa <- taxa_names(data)[which(taxa_sums(data)==0)]
taxa_to_keep <- taxa_names(data)[which(taxa_sums(data)>0)]
data <- prune_taxa(taxa_to_keep, data)
## abundance normalisation
if(params$normalisation){ data <- rarefy_even_depth(data, rngseed = 1121983)}
## save phyloseq object in Rdata file
save(data, file=params$outputRdata)
data
phyloseq-class experiment-level object
otu_table() OTU Table: [ 507 taxa and 64 samples ]
sample_data() Sample Data: [ 64 samples by 4 sample variables ]
tax_table() Taxonomy Table: [ 507 taxa by 7 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 507 tips and 506 internal nodes ]
if(length(empty_samples) > 0) {cat(paste('Remove empty samples: ', paste(empty_samples, collapse=",")))}
if(length(empty_taxa) > 0) {cat(paste('Remove empty taxa: ', paste(empty_taxa, collapse=",")))}
Remove empty taxa: otu_01781
if(params$normalisation){cat(paste('Number of sequences in each sample after normalisation: ', head(sample_sums(data))[[1]]))}
cat(paste('Rank names : ',paste(rank_names(data),collapse=', ')))
Rank names : Kingdom, Phylum, Class, Order, Family, Genus, Species
variables <- sample_variables(data)
cat(paste('Sample variables: ', paste(variables, collapse=', ' )))
Sample variables: EnvType, Description, FoodType, SampleID
for (var in variables){
cat(paste(var,': ',paste(levels(factor(get_variable(data, varName = var))),collapse=', '), '\n\n'))
}
EnvType : BoeufHache, VeauHache, DesLardons, MerguezVolaille, SaumonFume, FiletSaumon, FiletCabillaud, Crevette
Description : LOT1, LOT3, LOT4, LOT5, LOT6, LOT7, LOT8, LOT10, LOT2, LOT9
FoodType : Meat, Seafood
SampleID : BHT0.LOT01, BHT0.LOT03, BHT0.LOT04, BHT0.LOT05, BHT0.LOT06, BHT0.LOT07, BHT0.LOT08, BHT0.LOT10, VHT0.LOT01, VHT0.LOT02, VHT0.LOT03, VHT0.LOT04, VHT0.LOT06, VHT0.LOT07, VHT0.LOT08, VHT0.LOT10, DLT0.LOT01, DLT0.LOT03, DLT0.LOT04, DLT0.LOT05, DLT0.LOT06, DLT0.LOT07, DLT0.LOT08, DLT0.LOT10, MVT0.LOT01, MVT0.LOT03, MVT0.LOT05, MVT0.LOT06, MVT0.LOT07, MVT0.LOT08, MVT0.LOT09, MVT0.LOT10, SFT0.LOT01, SFT0.LOT02, SFT0.LOT03, SFT0.LOT04, SFT0.LOT05, SFT0.LOT06, SFT0.LOT07, SFT0.LOT08, FST0.LOT01, FST0.LOT02, FST0.LOT03, FST0.LOT05, FST0.LOT06, FST0.LOT07, FST0.LOT08, FST0.LOT10, FCT0.LOT01, FCT0.LOT02, FCT0.LOT03, FCT0.LOT05, FCT0.LOT06, FCT0.LOT07, FCT0.LOT08, FCT0.LOT10, CDT0.LOT02, CDT0.LOT04, CDT0.LOT05, CDT0.LOT06, CDT0.LOT07, CDT0.LOT08, CDT0.LOT09, CDT0.LOT10
if(params$treefile!="None"){
p <- plot_tree(data, color=rank_names(data)[2]) +
ggtitle(paste("Phylogenetic tree colored by", rank_names(data)[2])) +
theme(plot.title = element_text(hjust = 0.5))
plot(p)
}
if(params$treefile=="None"){
cat("There is no phylogenetic tree in the object you have provided.")
}
sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.1.2 (2021-11-01)
os Ubuntu 24.04.2 LTS
system x86_64, linux-gnu
ui X11
language fr_FR:en
collate en_US.utf8
ctype en_US.utf8
tz Europe/Paris
date 2026-01-14
pandoc 2.19.2 @ /home/maria/miniforge3/envs/frogs@5.1.0/bin/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
ade4 1.7-22 2023-02-06 [1] CRAN (R 4.1.3)
ape * 5.7-1 2023-03-13 [1] CRAN (R 4.1.3)
Biobase 2.54.0 2021-10-26 [1] Bioconductor
BiocGenerics 0.40.0 2021-10-26 [1] Bioconductor
biomformat 1.22.0 2021-10-26 [1] Bioconductor
Biostrings 2.62.0 2021-10-26 [1] Bioconductor
bitops 1.0-7 2021-04-24 [1] CRAN (R 4.1.3)
bslib 0.5.0 2023-06-09 [1] CRAN (R 4.1.3)
cachem 1.0.8 2023-05-01 [1] CRAN (R 4.1.3)
cli 3.6.1 2023-03-23 [1] CRAN (R 4.1.3)
cluster 2.1.4 2022-08-22 [1] CRAN (R 4.1.3)
codetools 0.2-19 2023-02-01 [1] CRAN (R 4.1.3)
colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.1.3)
crayon 1.5.2 2022-09-29 [1] CRAN (R 4.1.3)
data.table 1.14.8 2023-02-17 [1] CRAN (R 4.1.3)
digest 0.6.31 2022-12-11 [1] CRAN (R 4.1.3)
dplyr 1.1.2 2023-04-20 [1] CRAN (R 4.1.3)
evaluate 0.21 2023-05-05 [1] CRAN (R 4.1.3)
fansi 1.0.4 2023-01-22 [1] CRAN (R 4.1.3)
farver 2.1.1 2022-07-06 [1] CRAN (R 4.1.3)
fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.1.3)
foreach 1.5.2 2022-02-02 [1] CRAN (R 4.1.3)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.1.3)
GenomeInfoDb 1.30.1 2022-01-30 [1] Bioconductor
GenomeInfoDbData 1.2.7 2026-01-14 [1] Bioconductor
ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.1.3)
glue 1.6.2 2022-02-24 [1] CRAN (R 4.1.3)
gtable 0.3.3 2023-03-21 [1] CRAN (R 4.1.3)
highr 0.10 2022-12-22 [1] CRAN (R 4.1.3)
htmltools 0.5.5 2023-03-23 [1] CRAN (R 4.1.3)
igraph 1.3.5 2022-09-22 [1] CRAN (R 4.1.3)
IRanges 2.28.0 2021-10-26 [1] Bioconductor
iterators 1.0.14 2022-02-05 [1] CRAN (R 4.1.3)
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.1.3)
jsonlite 1.8.5 2023-06-05 [1] CRAN (R 4.1.3)
knitr 1.43 2023-05-25 [1] CRAN (R 4.1.3)
labeling 0.4.2 2020-10-20 [1] CRAN (R 4.1.3)
lattice 0.21-8 2023-04-05 [1] CRAN (R 4.1.3)
lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.1.3)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.1.3)
MASS 7.3-58.3 2023-03-07 [1] CRAN (R 4.1.3)
Matrix 1.5-4.1 2023-05-18 [1] CRAN (R 4.1.3)
mgcv 1.8-42 2023-03-02 [1] CRAN (R 4.1.3)
multtest 2.50.0 2021-10-26 [1] Bioconductor
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.3)
nlme 3.1-162 2023-01-31 [1] CRAN (R 4.1.3)
permute 0.9-7 2022-01-27 [1] CRAN (R 4.1.3)
phyloseq * 1.38.0 2021-10-26 [1] Bioconductor
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.1.3)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.3)
plyr 1.8.8 2022-11-11 [1] CRAN (R 4.1.3)
R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.3)
Rcpp 1.0.10 2023-01-22 [1] CRAN (R 4.1.3)
RCurl 1.98-1.12 2023-03-27 [1] CRAN (R 4.1.3)
reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.1.3)
rhdf5 2.38.1 2022-03-10 [1] Bioconductor
rhdf5filters 1.6.0 2021-10-26 [1] Bioconductor
Rhdf5lib 1.16.0 2021-10-26 [1] Bioconductor
rlang 1.1.1 2023-04-28 [1] CRAN (R 4.1.3)
rmarkdown 2.22 2023-06-01 [1] CRAN (R 4.1.3)
rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.1.3)
S4Vectors 0.32.4 2022-03-24 [1] Bioconductor
sass 0.4.6 2023-05-03 [1] CRAN (R 4.1.3)
scales 1.2.1 2022-08-20 [1] CRAN (R 4.1.3)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.1.3)
stringi 1.7.6 2021-11-29 [1] CRAN (R 4.1.1)
stringr 1.5.0 2022-12-02 [1] CRAN (R 4.1.3)
survival 3.5-5 2023-03-12 [1] CRAN (R 4.1.3)
tibble 3.2.1 2023-03-20 [1] CRAN (R 4.1.3)
tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.1.3)
utf8 1.2.3 2023-01-31 [1] CRAN (R 4.1.3)
vctrs 0.6.2 2023-04-19 [1] CRAN (R 4.1.3)
vegan 2.6-4 2022-10-11 [1] CRAN (R 4.1.3)
withr 2.5.0 2022-03-03 [1] CRAN (R 4.1.3)
xfun 0.39 2023-04-20 [1] CRAN (R 4.1.3)
XVector 0.34.0 2021-10-26 [1] Bioconductor
yaml 2.3.7 2023-01-23 [1] CRAN (R 4.1.3)
zlibbioc 1.40.0 2021-10-26 [1] Bioconductor
[1] /home/maria/miniforge3/envs/frogs@5.1.0/lib/R/library
──────────────────────────────────────────────────────────────────────────────