comparison recup_liste_taxon.R @ 0:3d750279158b draft

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 2a2ae892fa2dbc1eff9c6a59c3ad8f3c27c1c78d
author ecology
date Wed, 18 Oct 2023 09:58:34 +0000
parents
children b38b954b92b9
comparison
equal deleted inserted replaced
-1:000000000000 0:3d750279158b
1 #This script allows us to create a file telling us for each taxon if we obtained a BRT model. As well as the list of taxa.
2
3 #load packages
4 library(dplyr, warn.conflicts = FALSE)
5 library(taxonomyCleanr, warn.conflicts = FALSE)
6 library(stringr, warn.conflicts = FALSE)
7
8 #load arguments
9 args = commandArgs(trailingOnly=TRUE)
10
11 if (length(args)==0){
12 stop("This tool needs at least one argument")
13 }else{
14 data <- args[1]
15 preds <- args[2]
16 enviro <- args[3]
17 }
18
19 env = read.table(enviro, header=T, na.strings = "na")
20 occurrence_files = strsplit(data,",")
21 preds_files = strsplit(preds,",")
22
23 #########functions##########
24
25 `%!in%` <- Negate(`%in%`)
26
27 have_model = data.frame()
28 pres = 0
29
30 have.model <- function(taxon_phylum,noms_sp,comptage_sp,brt_phylum){
31 for (tax in taxon_phylum) {
32 if (tax %in% names(noms_sp)){
33 pres = sum(comptage_sp[tax])
34 }
35 if (tax %in% brt_phylum$spe ) {
36 brt = c(tax,"Yes", pres)
37 have_model = rbind(have_model,brt, make.row.names = F)}
38 else {
39 brt = c(tax,"No", pres)
40 have_model = rbind(have_model,brt, make.row.names = F)}
41 }
42 colnames(have_model) = c("Taxa","Model","Occurences")
43 return(have_model)}
44
45 ##########Execution########
46 brt = NULL
47 for (j in 1:length(preds_files[[1]])){
48 brt <- rbind(brt,read.table(preds_files[[1]][j], header = TRUE, na.strings = "na"))
49 }
50
51 for (i in 1:length(occurrence_files[[1]])) {
52 occurrence <- NULL
53 cmpt <- NULL
54 taxon <- list()
55
56 occurrence <- read.table(occurrence_files[[1]][i], dec = ",", sep = ";", header = TRUE, na.strings = "na")
57
58 taxon_names <- names(occurrence)
59 new_taxon <- taxon_names[!(taxon_names %in% names(env)) & taxon_names != "station"]
60 taxon <- c(taxon, new_taxon)
61
62 cmpt <- occurrence[, new_taxon]
63 cmpt <- as.data.frame(cmpt)
64
65 have_model <- have.model(taxon, occurrence, cmpt, brt)
66 }
67
68 #Taxa for which a model was obtained
69 have_model2 = subset(have_model, have_model$`Model` != "N")
70 have_model3 = subset(have_model, have_model$`Model` != "N")
71
72 #Obtain a list of taxa (cleaned) that have obtained a BRT model (file that can be submitted to the match taxa tool of the WoRMS database to obtain their classification and be able to sort duplicates between taxonomic ranks)
73
74 have_model2$Taxa <- as.character(trim_taxa(have_model2$Taxa))
75
76 #Second clean-up (elimination of all taxa ending in sp1./sp2 etc which represents a duplicate)
77
78 have_model2 <- have_model2 %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]"))
79 have_model3 <- have_model3 %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]"))
80 have_model <- have_model %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]"))
81
82 #extraction of the have_model object
83 write.csv(have_model,file = "have_model.csv", quote = F, row.names = F)
84
85 #getting list of taxa for next if not using worms
86 list_taxon = have_model3$Taxa
87 write.table(list_taxon, file= "list_taxa.txt", quote = F, row.names = F, col.names = F)
88
89 #getting the final list to submit to worms
90 liste_taxon = have_model2$Taxa
91 write.table(liste_taxon,file = "list_taxa_clean.txt", quote = F, row.names = F, col.names = F)
92
93