Mercurial > repos > ecology > claraguess
annotate nb_clust_G.R @ 1:f11879f7b554 draft
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
| author | ecology | 
|---|---|
| date | Wed, 28 May 2025 10:13:34 +0000 | 
| parents | adeb719a267f | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
1 # Script to determine the optimal number of clusters thanks to the optimization of the SIH index and to produce the files needed in the next step of clustering | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
2 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
3 #load packages | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
4 library(cluster) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
5 library(dplyr) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
6 library(tidyverse) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
7 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
8 #load arguments | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
9 args = commandArgs(trailingOnly=TRUE) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
10 if (length(args)==0) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
11 { | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
12 stop("This tool needs at least one argument") | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
13 }else{ | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
14 enviro <- args[1] | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
15 taxa_list <- args[2] | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
16 preds <- args[3] | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
17 max_k <- as.numeric(args[4]) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
18 metric <- args[5] | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
19 sample <- as.numeric(args[6]) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
20 } | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
21 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
22 #load data | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
23 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
24 env.data <- read.table(enviro, sep="\t", header = TRUE, dec = ".", na.strings = "-9999") | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
25 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
26 ##List of modelled taxa used for clustering | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
27 tv <- read.table(taxa_list, dec=".", sep=" ", header=F, na.strings = "NA") | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
28 names(tv) <- c("a") | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
29 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
30 ################Grouping of taxa if multiple prediction files entered ################ | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
31 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
32 data_split = str_split(preds,",") | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
33 data.bio = NULL | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
34 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
35 for (i in 1:length(data_split[[1]])) { | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
36 data.bio1 <- read.table(data_split[[1]][i], dec=".", sep="\t", header=T, na.strings = "NA") | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
37 data.bio <- rbind(data.bio,data.bio1) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
38 remove(data.bio1) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
39 } | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
40 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
41 names(data.bio) <- c("lat", "long", "pred", "taxon") | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
42 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
43 #keep selected taxa | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
44 data.bio <- data.bio[which(data.bio$taxon %in% tv$a),] | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
45 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
46 write.table(data.bio,file="data_bio.tsv",sep="\t",quote=F,row.names=F) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
47 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
48 #format data | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
49 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
50 test3 <- matrix(data.bio$pred , nrow = nrow(env.data), ncol = nrow(data.bio)/nrow(env.data)) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
51 test3 <- data.frame(test3) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
52 names(test3) <- unique(data.bio$taxon) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
53 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
54 write.table(test3, file="data_to_clus.tsv", sep="\t",quote=F,row.names=F) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
55 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
56 #Max number of clusters to test | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
57 max_k <- max_k | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
58 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
59 # Initialization of vectors to store SIH indices | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
60 sih_values <- rep(0, max_k) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
61 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
62 # Calculation of the SIH index for each number of clusters | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
63 for (k in 2:max_k) { | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
64 # Clara execution | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
65 clara_res <- clara(test3, k, metric =metric, samples = sample, sampsize = min(nrow(test3), (nrow(data.bio)/nrow(test3))+2*k)) | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
66 # Calculation of the SIH index | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
67 sih_values[k] <- clara_res$silinfo$avg.width | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
68 } | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
69 | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
70 # Plot SIH Index Chart by Number of Clusters | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
71 png("Indices_SIH.png") | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
72 plot(2:max_k, sih_values[2:max_k], type = "b", xlab = "Number of clusters", ylab = "SIH index") | 
| 
 
adeb719a267f
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
 
ecology 
parents:  
diff
changeset
 | 
73 dev.off() | 
