changeset 2:f8962f1c832a draft

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 5d48df67919fbc9d77b98a8243d438c397f61a0e
author ecology
date Thu, 21 Mar 2024 14:04:25 +0000
parents fc621f3f8226
children a56c413f3a98
files BRT_model.xml GeoNN.R recup_liste_taxon.R test-data/ceamarc_env.tsv test-data/fish_wide.tsv
diffstat 5 files changed, 121 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/BRT_model.xml	Wed Jan 24 15:52:56 2024 +0000
+++ b/BRT_model.xml	Thu Mar 21 14:04:25 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="ecoregion_brt_analysis" name="BRT prediction tool" version="0.1.0+galaxy0" profile="22.05">
+<tool id="ecoregion_brt_analysis" name="BRT tool prediction" version="0.1.0+galaxy0" profile="22.05">
     <description>for species distribution modelling</description>
     <requirements>
        <requirement type="package" version="4.3.0">r-base</requirement>
@@ -30,7 +30,7 @@
               <option value=".">Dot</option>
               <option value=",">Comma</option>
       </param>
-      <param name="abioticname" type="data_column" label="Choose column(s) where your abiotic parameter are in your environment data file." data_ref="enviro" multiple="true"/>
+      <param name="abioticname" type="data_column" label="Choose column(s) where your abiotic parameter are in your environment data file." data_ref="enviro" multiple="true" use_header_names="true"/>
     </inputs>
     <outputs>
       <collection name="outputpred" type="list" label="Prediction files">
@@ -64,8 +64,8 @@
 **What it does ?**
 ==================
 
-This Galaxy tool is made to characterize the distribution of each taxon by giving a probability indicator taxon presence for each environmental layer pixel. To do this, the boosted regression trees (BRT) method (Elith *et al*., 2008) is used to fit the relationship between the presence of a single taxon and the environmental conditions where the taxon has been
-detected. Two steps are performed in this script: the creation of the taxon distribution model and the use of this model to obtain a predictive index. The prediction index obtained from each BRT model for each pixel of the environmental layers is an approximation of the probability of detection of the presence of the taxon.
+This Galaxy tool is made to characterize the distribution of each taxon by giving a probability indicator taxon presence for each environmental layer pixel. To do this, the boosted regression trees (BRT) method (Elith *et al*., 2008) is used to fit the relationship between the presence of a single taxon and the environmental conditions where the taxon has been detected. 
+Two steps are performed in this script: the creation of the taxon distribution model and the use of this model to obtain a predictive index. The prediction index obtained from each BRT model for each pixel of the environmental layers is an approximation of the probability of detection of the presence of the taxon.
 
 ===================         
 **How to use it ?**
@@ -74,9 +74,10 @@
 This tool takes in input the environmental data (for all the study areas) as well as the species occurrence data and the environmental characteristics where the species has been observed. See examples of inputs below. These files need to be in tabular format. You also need to select the column where your abiotic parameters are in your environment data file.
  
  .. class:: infomark 
-Your abiotic parameters must be present in your occurrence data file(s) and must be named the same as in your environment file.
+    Your abiotic parameters must be present in your occurrence data file(s) and must be named the same as in your environment file. This file can be obtain with the tool called GeoNearestNeighbor.
+    GeoNearestNeighbor tool allows you to merge two data tables according to their latitude and longitude coordinates, finding the closest points.
  
-This tool gives in output a file containing the predictions of the probability of the presence of each taxon for each pixel (latitude, longitude) environmental, a visualization of these pixels for each taxon and graphs showing the percentage of model explanation for each environmental parameter. 
+This tool gives in output a file containing the predictions of the probability of the presence of each taxon for each pixel (latitude, longitude) environmental, a visualization of these pixels for each taxon and graphs showing the percentage of model explanation for each environmental parameter.
 
 **Example of environmental data input :** 
 -----------------------------------------
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GeoNN.R	Thu Mar 21 14:04:25 2024 +0000
@@ -0,0 +1,77 @@
+#Date : 09/02/2024
+#Author : Seguineau Pauline
+
+#Load libraries
+library(tidyr)
+library(dplyr)
+library(sf)
+
+#load arguments
+args = commandArgs(trailingOnly=TRUE) 
+if (length(args)==0)
+{
+    stop("This tool needs at least one argument")
+}else{
+    enviro <- args[1]
+    envlong <- as.numeric(args[2])
+    envlat <- as.numeric(args[3])
+    occu <- args[4]
+    occulat <- as.numeric(args[5])
+    occulong <- as.numeric(args[6])
+}
+ 
+env = read.table(enviro, header = TRUE, sep="\t")
+occ = read.table(occu, header = TRUE, sep = "\t")
+
+cols_env = c(names(env[envlong]),names(env[envlat]))
+cols_occ = c(names(occ[occulong]),names(occ[occulat]))
+
+###calculate distances### 
+#transform tables into sf object
+
+env_sf <- st_as_sf(env, coords = cols_env, crs = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
+occ_sf <- st_as_sf(occ, coords = cols_occ, crs = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
+
+#Find the indices of env_sf entities closest to each point in occ_sf.
+
+nearest_indices <- st_nearest_feature(occ_sf, env_sf)
+
+nearest_points <- env[nearest_indices, ]
+
+# Calculate distances between env_sf and occ_sf points
+distances <- st_distance(env_sf, occ_sf)
+
+#Extract the corresponding distances between occ and env
+
+nearest_distances <- numeric(length(nearest_indices))
+
+for (i in 1:length(nearest_indices)) {
+  nearest_distances[i] <- st_distance(env_sf[nearest_indices[i],], occ_sf[i,])
+}
+
+#assemble occurrences and environmental parameters in the same file
+
+nearest_points <- nearest_points[, !names(nearest_points) %in% cols_env] #remove lat and long from env to clean data
+new_occ = cbind(occ, nearest_points)
+
+#Save the file 
+
+write.table(new_occ, file = "occurrence_env.tsv",sep ="\t",quote = F, row.names = F,col.names = T)
+
+#create an information file with the distances between the points of the two files 
+
+distance_info <- data.frame(
+  occ_geometry = occ_sf$geometry,
+  env_geometry = env_sf$geometry[nearest_indices],
+  distance = nearest_distances
+)
+
+colnames(distance_info)[1] <- "occ_geometry"
+colnames(distance_info)[2] <- "env_geometry"
+colnames(distance_info)[3] <- "Distances (meters)"
+#save the information file
+
+write.table(distance_info, file = "infos_file.tsv",sep ="\t",quote = F, row.names = F,col.names = T)
+
+
+
--- a/recup_liste_taxon.R	Wed Jan 24 15:52:56 2024 +0000
+++ b/recup_liste_taxon.R	Thu Mar 21 14:04:25 2024 +0000
@@ -82,7 +82,7 @@
 #extraction of the have_model object
 write.table(have_model,file = "have_model.tsv", sep="\t", quote = F, row.names = F)
 
-#getting list of taxa for next if not using worms
+#getting list of taxa for next step if not using worms
 list_taxon = have_model3$Taxa
 write.table(list_taxon, file= "list_taxa.txt", quote = F, row.names = F, col.names = F)
 
--- a/test-data/ceamarc_env.tsv	Wed Jan 24 15:52:56 2024 +0000
+++ b/test-data/ceamarc_env.tsv	Thu Mar 21 14:04:25 2024 +0000
@@ -1,16 +1,16 @@
-long	lat	Carbo	Grav	Maxbearing	Maxmagnit	Meancurmag	Meansal	Meantheta	Mud	Prof	Rugosity	Sand	Seaice_prod	Sili	Slope	Standcurmag	Standsal	Standtheta	long_round	lat_round
-1	139.22	-65.57	0.88	28.59	3.67	0.03	0.03	34.62	-0.13	22.72	-441	-9999	55.76	0.24	3.27	0.28	0.01	0.01	0.18	139,22	-65,57
-2	139.22	-65.57	0.88	28.61	3.64	0.02	0.03	34.62	-0.13	22.48	-439	-9999	55.74	0.24	3.29	0.27	0.01	0.01	0.18	139,22	-65,57
-3	139.23	-65.57	0.92	28.62	3.59	0.02	0.03	34.62	-0.14	22.25	-438	-9999	56.28	0.25	3.32	0.22	0.01	0.01	0.19	139,23	-65,57
-4	139.24	-65.57	0.92	28.63	3.51	0.01	0.03	34.62	-0.14	21.95	-436	-9999	56.57	0.26	3.3	0.08	0.01	0.01	0.19	139,24	-65,57
-5	139.24	-65.57	0.92	28.64	3.35	0.01	0.03	34.62	-0.14	21.7	-437	-9999	56.58	0.26	3.28	0.05	0.01	0.01	0.19	139,24	-65,57
-6	139.25	-65.57	0.93	28.65	3	0.0096293305978179	0.03	34.62	-0.15	21.44	-436	-9999	56.63	0.26	3.26	0.29	0.01	0.01	0.19	139,25	-65,57
-7	139.26	-65.57	0.93	28.63	2.49	0.00871255807578564	0.03	34.62	-0.15	21.11	-432	-9999	56.67	0.26	3.23	0.43	0.01	0.01	0.19	139,26	-65,57
-8	139.26	-65.57	0.93	28.64	2.01	0.01	0.03	34.62	-0.16	20.83	-429	-9999	56.71	0.26	3.21	0.37	0.01	0.01	0.19	139,26	-65,57
-9	139.27	-65.57	0.94	28.65	1.71	0.01	0.03	34.62	-0.16	20.55	-427	-9999	56.75	0.26	3.19	0.32	0.01	0.01	0.2	139,27	-65,57
-10	139.28	-65.57	0.94	28.66	1.54	0.01	0.03	34.62	-0.16	20.21	-424	-9999	56.8	0.26	3.17	0.28	0.01	0.01	0.2	139,28	-65,57
-11	139.28	-65.57	0.94	28.67	1.44	0.02	0.03	34.62	-0.17	19.74	-422	-9999	56.84	0.26	3.14	0.26	0.01	0.01	0.2	139,28	-65,57
-12	139.29	-65.57	0.94	28.68	1.74	0.01	0.03	34.62	-0.17	20.86	-421	-9999	56.87	0.26	3.13	0.22	0.01	0.01	0.2	139,29	-65,57
-13	139.3	-65.57	0.95	28.7	3.46	0.01	0.03	34.62	-0.17	21.3	-420	-9999	56.91	0.26	3.11	0.32	0.01	0.01	0.2	139,30	-65,57
-14	139.3	-65.57	0.95	28.71	3.91	0.03	0.03	34.62	-0.18	21.01	-414	-9999	57.18	0.26	3.09	0.59	0.01	0.01	0.21	139,30	-65,57
-15	139.31	-65.57	0.96	28.72	4.03	0.05	0.03	34.62	-0.18	20.76	-406	-9999	57.54	0.26	3.07	0.53	0.01	0.01	0.21	139,31	-65,57
+long	lat	Carbo	Grav	Maxbearing	Maxmagnit	Meancurmag	Meansal	Meantheta	Mud	Prof	Rugosity	Sand	Seaice_prod	Sili	Slope	Standcurmag	Standsal	Standtheta
+139.22	-65.57	0.88	28.59	3.67	0.03	0.03	34.62	-0.13	22.72	-441	-9999	55.76	0.24	3.27	0.28	0.01	0.01	0.18
+139.22	-65.57	0.88	28.61	3.64	0.02	0.03	34.62	-0.13	22.48	-439	-9999	55.74	0.24	3.29	0.27	0.01	0.01	0.18
+139.23	-65.57	0.92	28.62	3.59	0.02	0.03	34.62	-0.14	22.25	-438	-9999	56.28	0.25	3.32	0.22	0.01	0.01	0.19
+139.24	-65.57	0.92	28.63	3.51	0.01	0.03	34.62	-0.14	21.95	-436	-9999	56.57	0.26	3.3	0.08	0.01	0.01	0.19
+139.24	-65.57	0.92	28.64	3.35	0.01	0.03	34.62	-0.14	21.7	-437	-9999	56.58	0.26	3.28	0.05	0.01	0.01	0.19
+139.25	-65.57	0.93	28.65	3	0.0096293305978179	0.03	34.62	-0.15	21.44	-436	-9999	56.63	0.26	3.26	0.29	0.01	0.01	0.19
+139.26	-65.57	0.93	28.63	2.49	0.00871255807578564	0.03	34.62	-0.15	21.11	-432	-9999	56.67	0.26	3.23	0.43	0.01	0.01	0.19
+139.26	-65.57	0.93	28.64	2.01	0.01	0.03	34.62	-0.16	20.83	-429	-9999	56.71	0.26	3.21	0.37	0.01	0.01	0.19
+139.27	-65.57	0.94	28.65	1.71	0.01	0.03	34.62	-0.16	20.55	-427	-9999	56.75	0.26	3.19	0.32	0.01	0.01	0.2
+139.28	-65.57	0.94	28.66	1.54	0.01	0.03	34.62	-0.16	20.21	-424	-9999	56.8	0.26	3.17	0.28	0.01	0.01	0.2
+139.28	-65.57	0.94	28.67	1.44	0.02	0.03	34.62	-0.17	19.74	-422	-9999	56.84	0.26	3.14	0.26	0.01	0.01	0.2
+139.29	-65.57	0.94	28.68	1.74	0.01	0.03	34.62	-0.17	20.86	-421	-9999	56.87	0.26	3.13	0.22	0.01	0.01	0.2
+139.3	-65.57	0.95	28.7	3.46	0.01	0.03	34.62	-0.17	21.3	-420	-9999	56.91	0.26	3.11	0.32	0.01	0.01	0.2
+139.3	-65.57	0.95	28.71	3.91	0.03	0.03	34.62	-0.18	21.01	-414	-9999	57.18	0.26	3.09	0.59	0.01	0.01	0.21
+139.31	-65.57	0.96	28.72	4.03	0.05	0.03	34.62	-0.18	20.76	-406	-9999	57.54	0.26	3.07	0.53	0.01	0.01	0.21
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fish_wide.tsv	Thu Mar 21 14:04:25 2024 +0000
@@ -0,0 +1,20 @@
+lat	long	rajidae_bathyraja_sp.	myctophidae_electrona_antarctica	myctophidae_protomyctophum_bolini
+-67.22	139.96	4	1	0
+-65.46	139.31	0	4	0
+-65.82	142.96	0	0	1
+-65.77	142.92	0	1	1
+-65.64	140.45	0	2	0
+-65.74	142.86	0	1	0
+-65.85	144.04	0	2	0
+-65.44	139.32	0	0	0
+-65.71	140.6	0	0	0
+-66.75	143.95	0	0	0
+-66.41	140.51	0	0	0
+-66.34	140.03	0	0	0
+-66.17	139.35	0	0	0
+-66.33	143.36	0	0	0
+-66.39	140.43	0	0	0
+-66.75	144.96	0	0	0
+-66.34	143.04	0	0	0
+-67.05	145.15	0	0	0
+