diff wormsmeasurements.R @ 0:23b963a1284e draft

planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
author ecology
date Wed, 14 May 2025 15:08:00 +0000
parents
children 6f75ab89587a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wormsmeasurements.R	Wed May 14 15:08:00 2025 +0000
@@ -0,0 +1,89 @@
+##05/05/2025
+##Jean Le Cras
+### Enrich dataset with data from WoRMS
+
+#load libraries
+library(tidyverse)
+library(worrms)
+library(fastDummies)
+
+### parameters
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) == 0) {
+    stop("This tool needs at least one argument")
+}
+
+occurrence <- read.csv(args[1], header=T, sep="\t") %>% arrange(scientificName)
+measurement_types <- unlist(str_split(args[2], ","))
+include_inherited <- ifelse(args[4]=="true", T, F)
+pivot_wider <- ifelse(args[5]=="true", T, F)
+scientificName_name <- args[3]
+
+
+### 
+extract_traits_values <- function(traits_data) {
+  result <- setNames(rep(NA, length(measurement_types)), measurement_types)
+  
+  if (is.null(traits_data) || nrow(traits_data) == 0) {
+    return(result)
+  }
+  
+  traits_filtered <- traits_data %>%
+    filter(measurementType %in% measurement_types) %>%
+    filter(!is.na(measurementValue))
+  
+  if (nrow(traits_filtered) == 0) {
+    return(result)
+  }
+  
+  for (i in 1:nrow(traits_filtered)) {
+    result[traits_filtered$measurementType[i]] <- traits_filtered$measurementValue[i]
+  }
+  return(result)
+}
+
+get_life_history_traits <- function(scientific_name) {
+  if (scientific_name %in% names(cache)) { 
+    return(cache[[scientific_name]])  
+  }
+  
+  worms_id <- tryCatch(
+    wm_name2id(name = scientific_name),
+    error = function(e) NA
+  )
+  
+  if (is.na(worms_id) || length(worms_id) == 0) {
+    cache[[scientific_name]] <<- NULL
+    return(NULL)
+  }
+  
+  data_attr <- tryCatch(
+    wm_attr_data(worms_id, include_inherited=include_inherited),
+    error = function(e) NULL
+  )
+  
+  if (is.null(data_attr)) {
+    cache[[scientific_name]] <<- NULL
+    return(NULL)
+  }
+  
+  traits <- extract_traits_values(data_attr)
+  cache[[scientific_name]] <<- traits
+  return(traits)
+}
+
+cache <- list()
+
+trait_data <- occurrence %>%
+  mutate(life_history_traits = map(.data[[scientificName_name]], ~ get_life_history_traits(.x)))
+
+view(trait_data)
+trait_data <- trait_data %>%
+  unnest_wider(life_history_traits)
+
+if (pivot_wider) {
+  trait_data <- dummy_cols(trait_data, select_columns = measurement_types, remove_selected_columns=T, ignore_na=T)
+
+}
+
+write.table(trait_data, "enriched_data.tabular", sep="\t", row.names = FALSE)
\ No newline at end of file