diff recup_liste_taxon.R @ 0:5cde56683579 draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 5d48df67919fbc9d77b98a8243d438c397f61a0e
author ecology
date Thu, 21 Mar 2024 14:05:01 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/recup_liste_taxon.R	Thu Mar 21 14:05:01 2024 +0000
@@ -0,0 +1,93 @@
+#This script allows us to create a file telling us for each taxon if we obtained a BRT model. As well as the list of taxa.
+
+#load packages
+library(dplyr, warn.conflicts = FALSE)
+library(taxonomyCleanr, warn.conflicts = FALSE)
+library(stringr, warn.conflicts = FALSE)
+ 
+#load arguments
+args = commandArgs(trailingOnly=TRUE) 
+
+if (length(args)==0){
+    stop("This tool needs at least one argument")
+}else{
+    data <- args[1]
+    preds <- args[2]
+    enviro <- args[3]
+}
+
+env = read.table(enviro, sep="\t", dec=".", header=T, na.strings = "-9999")
+occurrence_files = strsplit(data,",")
+preds_files = strsplit(preds,",")
+
+#########functions##########
+
+`%!in%` <- Negate(`%in%`)
+
+have_model = data.frame()
+pres = 0
+
+have.model <- function(taxon_phylum,noms_sp,comptage_sp,brt_phylum){
+  for (tax in taxon_phylum) {
+    if (tax %in% names(noms_sp)){
+      pres = sum(comptage_sp[tax])
+    }
+    if (tax %in% brt_phylum$spe  ) {
+      brt = c(tax,"Yes", pres)
+      have_model = rbind(have_model,brt, make.row.names = F)}
+    else {
+      brt = c(tax,"No", pres)
+      have_model = rbind(have_model,brt, make.row.names = F)}
+  }
+  colnames(have_model) = c("Taxa","Model","Occurences")
+  return(have_model)}
+
+##########Execution########
+brt = NULL
+for (j in 1:length(preds_files[[1]])){
+    brt <- rbind(brt,read.table(preds_files[[1]][j], sep="\t", header = TRUE, na.strings = "na"))
+}
+
+for (i in 1:length(occurrence_files[[1]])) {
+  occurrence <- NULL
+  cmpt <- NULL
+  taxon <- list()
+  
+  occurrence <- read.table(occurrence_files[[1]][i], sep = "\t", header = TRUE, na.strings = "na")
+  
+  taxon_names <- names(occurrence)
+  new_taxon <- taxon_names[!(taxon_names %in% names(env)) & taxon_names != "station"]
+  taxon <- c(taxon, new_taxon)
+  
+  cmpt <- occurrence[, new_taxon]
+  cmpt <- as.data.frame(cmpt)
+  
+  have_model <- have.model(taxon, occurrence, cmpt, brt)
+}
+
+#Taxa for which a model was obtained
+have_model2 = subset(have_model, have_model$`Model` != "No")
+have_model3 = subset(have_model, have_model$`Model` != "No")
+
+#Obtain a list of taxa (cleaned) that have obtained a BRT model (file that can be submitted to the match taxa tool of the WoRMS database to obtain their classification and be able to sort duplicates between taxonomic ranks)
+
+have_model2$Taxa <- as.character(trim_taxa(have_model2$Taxa))
+
+#Second clean-up (elimination of all taxa ending in sp1./sp2 etc which represents a duplicate)
+
+have_model2 <- have_model2 %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]"))
+have_model3 <- have_model3 %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]"))
+have_model <- have_model %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]"))
+
+#extraction of the have_model object
+write.table(have_model,file = "have_model.tsv", sep="\t", quote = F, row.names = F)
+
+#getting list of taxa for next step if not using worms
+list_taxon = have_model3$Taxa
+write.table(list_taxon, file= "list_taxa.txt", quote = F, row.names = F, col.names = F)
+
+#getting the final list to submit to worms
+liste_taxon = have_model2$Taxa
+write.table(liste_taxon,file = "list_taxa_clean.txt", quote = F, row.names = F, col.names = F)
+
+