Mercurial > repos > ecology > ecoregion_geonearestneighbor
comparison recup_liste_taxon.R @ 0:5cde56683579 draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 5d48df67919fbc9d77b98a8243d438c397f61a0e
author | ecology |
---|---|
date | Thu, 21 Mar 2024 14:05:01 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5cde56683579 |
---|---|
1 #This script allows us to create a file telling us for each taxon if we obtained a BRT model. As well as the list of taxa. | |
2 | |
3 #load packages | |
4 library(dplyr, warn.conflicts = FALSE) | |
5 library(taxonomyCleanr, warn.conflicts = FALSE) | |
6 library(stringr, warn.conflicts = FALSE) | |
7 | |
8 #load arguments | |
9 args = commandArgs(trailingOnly=TRUE) | |
10 | |
11 if (length(args)==0){ | |
12 stop("This tool needs at least one argument") | |
13 }else{ | |
14 data <- args[1] | |
15 preds <- args[2] | |
16 enviro <- args[3] | |
17 } | |
18 | |
19 env = read.table(enviro, sep="\t", dec=".", header=T, na.strings = "-9999") | |
20 occurrence_files = strsplit(data,",") | |
21 preds_files = strsplit(preds,",") | |
22 | |
23 #########functions########## | |
24 | |
25 `%!in%` <- Negate(`%in%`) | |
26 | |
27 have_model = data.frame() | |
28 pres = 0 | |
29 | |
30 have.model <- function(taxon_phylum,noms_sp,comptage_sp,brt_phylum){ | |
31 for (tax in taxon_phylum) { | |
32 if (tax %in% names(noms_sp)){ | |
33 pres = sum(comptage_sp[tax]) | |
34 } | |
35 if (tax %in% brt_phylum$spe ) { | |
36 brt = c(tax,"Yes", pres) | |
37 have_model = rbind(have_model,brt, make.row.names = F)} | |
38 else { | |
39 brt = c(tax,"No", pres) | |
40 have_model = rbind(have_model,brt, make.row.names = F)} | |
41 } | |
42 colnames(have_model) = c("Taxa","Model","Occurences") | |
43 return(have_model)} | |
44 | |
45 ##########Execution######## | |
46 brt = NULL | |
47 for (j in 1:length(preds_files[[1]])){ | |
48 brt <- rbind(brt,read.table(preds_files[[1]][j], sep="\t", header = TRUE, na.strings = "na")) | |
49 } | |
50 | |
51 for (i in 1:length(occurrence_files[[1]])) { | |
52 occurrence <- NULL | |
53 cmpt <- NULL | |
54 taxon <- list() | |
55 | |
56 occurrence <- read.table(occurrence_files[[1]][i], sep = "\t", header = TRUE, na.strings = "na") | |
57 | |
58 taxon_names <- names(occurrence) | |
59 new_taxon <- taxon_names[!(taxon_names %in% names(env)) & taxon_names != "station"] | |
60 taxon <- c(taxon, new_taxon) | |
61 | |
62 cmpt <- occurrence[, new_taxon] | |
63 cmpt <- as.data.frame(cmpt) | |
64 | |
65 have_model <- have.model(taxon, occurrence, cmpt, brt) | |
66 } | |
67 | |
68 #Taxa for which a model was obtained | |
69 have_model2 = subset(have_model, have_model$`Model` != "No") | |
70 have_model3 = subset(have_model, have_model$`Model` != "No") | |
71 | |
72 #Obtain a list of taxa (cleaned) that have obtained a BRT model (file that can be submitted to the match taxa tool of the WoRMS database to obtain their classification and be able to sort duplicates between taxonomic ranks) | |
73 | |
74 have_model2$Taxa <- as.character(trim_taxa(have_model2$Taxa)) | |
75 | |
76 #Second clean-up (elimination of all taxa ending in sp1./sp2 etc which represents a duplicate) | |
77 | |
78 have_model2 <- have_model2 %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]")) | |
79 have_model3 <- have_model3 %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]")) | |
80 have_model <- have_model %>% filter(!str_ends(Taxa, "sp.1|sp[0-9]")) | |
81 | |
82 #extraction of the have_model object | |
83 write.table(have_model,file = "have_model.tsv", sep="\t", quote = F, row.names = F) | |
84 | |
85 #getting list of taxa for next step if not using worms | |
86 list_taxon = have_model3$Taxa | |
87 write.table(list_taxon, file= "list_taxa.txt", quote = F, row.names = F, col.names = F) | |
88 | |
89 #getting the final list to submit to worms | |
90 liste_taxon = have_model2$Taxa | |
91 write.table(liste_taxon,file = "list_taxa_clean.txt", quote = F, row.names = F, col.names = F) | |
92 | |
93 |