Mercurial > repos > ecology > wormsmeasurements
diff wormsmeasurements.R @ 0:23b963a1284e draft
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
author | ecology |
---|---|
date | Wed, 14 May 2025 15:08:00 +0000 |
parents | |
children | 6f75ab89587a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wormsmeasurements.R Wed May 14 15:08:00 2025 +0000 @@ -0,0 +1,89 @@ +##05/05/2025 +##Jean Le Cras +### Enrich dataset with data from WoRMS + +#load libraries +library(tidyverse) +library(worrms) +library(fastDummies) + +### parameters +args <- commandArgs(trailingOnly = TRUE) +if (length(args) == 0) { + stop("This tool needs at least one argument") +} + +occurrence <- read.csv(args[1], header=T, sep="\t") %>% arrange(scientificName) +measurement_types <- unlist(str_split(args[2], ",")) +include_inherited <- ifelse(args[4]=="true", T, F) +pivot_wider <- ifelse(args[5]=="true", T, F) +scientificName_name <- args[3] + + +### +extract_traits_values <- function(traits_data) { + result <- setNames(rep(NA, length(measurement_types)), measurement_types) + + if (is.null(traits_data) || nrow(traits_data) == 0) { + return(result) + } + + traits_filtered <- traits_data %>% + filter(measurementType %in% measurement_types) %>% + filter(!is.na(measurementValue)) + + if (nrow(traits_filtered) == 0) { + return(result) + } + + for (i in 1:nrow(traits_filtered)) { + result[traits_filtered$measurementType[i]] <- traits_filtered$measurementValue[i] + } + return(result) +} + +get_life_history_traits <- function(scientific_name) { + if (scientific_name %in% names(cache)) { + return(cache[[scientific_name]]) + } + + worms_id <- tryCatch( + wm_name2id(name = scientific_name), + error = function(e) NA + ) + + if (is.na(worms_id) || length(worms_id) == 0) { + cache[[scientific_name]] <<- NULL + return(NULL) + } + + data_attr <- tryCatch( + wm_attr_data(worms_id, include_inherited=include_inherited), + error = function(e) NULL + ) + + if (is.null(data_attr)) { + cache[[scientific_name]] <<- NULL + return(NULL) + } + + traits <- extract_traits_values(data_attr) + cache[[scientific_name]] <<- traits + return(traits) +} + +cache <- list() + +trait_data <- occurrence %>% + mutate(life_history_traits = map(.data[[scientificName_name]], ~ get_life_history_traits(.x))) + +view(trait_data) +trait_data <- trait_data %>% + unnest_wider(life_history_traits) + +if (pivot_wider) { + trait_data <- dummy_cols(trait_data, select_columns = measurement_types, remove_selected_columns=T, ignore_na=T) + +} + +write.table(trait_data, "enriched_data.tabular", sep="\t", row.names = FALSE) \ No newline at end of file