comparison recup_liste_taxon.R @ 0:5cde56683579 draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 5d48df67919fbc9d77b98a8243d438c397f61a0e
author ecology
date Thu, 21 Mar 2024 14:05:01 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5cde56683579
1 #This script allows us to create a file telling us for each taxon if we obtained a BRT model. As well as the list of taxa.
2
3 #load packages
4 library(dplyr, warn.conflicts = FALSE)
5 library(taxonomyCleanr, warn.conflicts = FALSE)
6 library(stringr, warn.conflicts = FALSE)
7
8 #load arguments
9 args = commandArgs(trailingOnly=TRUE)
10
11 if (length(args)==0){
12 stop("This tool needs at least one argument")
13 }else{
14 data <- args[1]
15 preds <- args[2]
16 enviro <- args[3]
17 }
18
19 env = read.table(enviro, sep="\t", dec=".", header=T, na.strings = "-9999")
20 occurrence_files = strsplit(data,",")
21 preds_files = strsplit(preds,",")
22
23 #########functions##########
24
25 `%!in%` <- Negate(`%in%`)
26
27 have_model = data.frame()
28 pres = 0
29
30 have.model <- function(taxon_phylum,noms_sp,comptage_sp,brt_phylum){
31 for (tax in taxon_phylum) {
32 if (tax %in% names(noms_sp)){
33 pres = sum(comptage_sp[tax])
34 }
35 if (tax %in% brt_phylum$spe ) {
36 brt = c(tax,"Yes", pres)
37 have_model = rbind(have_model,brt, make.row.names = F)}
38 else {
39 brt = c(tax,"No", pres)
40 have_model = rbind(have_model,brt, make.row.names = F)}
41 }
42 colnames(have_model) = c("Taxa","Model","Occurences")
43 return(have_model)}
44
45 ##########Execution########
46 brt = NULL
47 for (j in 1:length(preds_files[[1]])){
48 brt <- rbind(brt,read.table(preds_files[[1]][j], sep="\t", header = TRUE, na.strings = "na"))
49 }
50
51 for (i in 1:length(occurrence_files[[1]])) {
52 occurrence <- NULL
53 cmpt <- NULL
54 taxon <- list()
55
56 occurrence <- read.table(occurrence_files[[1]][i], sep = "\t", header = TRUE, na.strings = "na")
57
58 taxon_names <- names(occurrence)
59 new_taxon <- taxon_names[!(taxon_names %in% names(env)) & taxon_names != "station"]
60 taxon <- c(taxon, new_taxon)
61
62 cmpt <- occurrence[, new_taxon]
63 cmpt <- as.data.frame(cmpt)
64
65 have_model <- have.model(taxon, occurrence, cmpt, brt)
66 }
67
68 #Taxa for which a model was obtained
69 have_model2 = subset(have_model, have_model$`Model` != "No")
70 have_model3 = subset(have_model, have_model$`Model` != "No")
71
72 #Obtain a list of taxa (cleaned) that have obtained a BRT model (file that can be submitted to the match taxa tool of the WoRMS database to obtain their classification and be able to sort duplicates between taxonomic ranks)
73
74 have_model2$Taxa <- as.character(trim_taxa(have_model2$Taxa))
75
76 #Second clean-up (elimination of all taxa ending in sp1./sp2 etc which represents a duplicate)
77
78 have_model2 <- have_model2 %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]"))
79 have_model3 <- have_model3 %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]"))
80 have_model <- have_model %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]"))
81
82 #extraction of the have_model object
83 write.table(have_model,file = "have_model.tsv", sep="\t", quote = F, row.names = F)
84
85 #getting list of taxa for next step if not using worms
86 list_taxon = have_model3$Taxa
87 write.table(list_taxon, file= "list_taxa.txt", quote = F, row.names = F, col.names = F)
88
89 #getting the final list to submit to worms
90 liste_taxon = have_model2$Taxa
91 write.table(liste_taxon,file = "list_taxa_clean.txt", quote = F, row.names = F, col.names = F)
92
93