changeset 0:23b963a1284e draft

planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
author ecology
date Wed, 14 May 2025 15:08:00 +0000
parents
children 6f75ab89587a
files test-data/enriched_data.tabular test-data/enriched_data_inherited.tabular test-data/enriched_data_inherited_ohe.tabular test-data/sample.tabular wormsmeasurements.R wormsmeasurements.xml
diffstat 6 files changed, 251 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/enriched_data.tabular	Wed May 14 15:08:00 2025 +0000
@@ -0,0 +1,11 @@
+"decimalLatitude"	"decimalLongitude"	"scientificName"	"Development"	" Fecundity"
+-49.355	70.215	"Abatus cordatus"	NA	NA
+-66.963303	163.223297	"Ctenocidaris spinosa"	NA	NA
+-42.45	-74.75833333	"Loxechinus albus"	NA	NA
+-37.606167	176.705167	"Ogmocidaris benhami"	NA	NA
+-36.201698	175.834198	"Peronella hinemoae"	NA	NA
+-37.494667	176.672501	"Phormosoma bursarium"	NA	NA
+-43.469	173.572	"Pseudechinus huttoni"	NA	NA
+-47.7	179.45	"Pseudechinus novaezealandiae"	NA	NA
+-74.72	164.2183333	"Sterechinus neumayeri"	NA	NA
+-70.51166667	-8.801	"Sterechinus sp"	NA	NA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/enriched_data_inherited.tabular	Wed May 14 15:08:00 2025 +0000
@@ -0,0 +1,11 @@
+"decimalLatitude"	"decimalLongitude"	"scientificName"	"Development"	" Fecundity"
+-49.355	70.215	"Abatus cordatus"	"planktotrophic"	NA
+-66.963303	163.223297	"Ctenocidaris spinosa"	"direct development"	NA
+-42.45	-74.75833333	"Loxechinus albus"	"planktotrophic"	NA
+-37.606167	176.705167	"Ogmocidaris benhami"	"planktotrophic"	NA
+-36.201698	175.834198	"Peronella hinemoae"	"planktotrophic"	NA
+-37.494667	176.672501	"Phormosoma bursarium"	"planktotrophic"	NA
+-43.469	173.572	"Pseudechinus huttoni"	"planktotrophic"	NA
+-47.7	179.45	"Pseudechinus novaezealandiae"	"planktotrophic"	NA
+-74.72	164.2183333	"Sterechinus neumayeri"	"planktotrophic"	NA
+-70.51166667	-8.801	"Sterechinus sp"	NA	NA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/enriched_data_inherited_ohe.tabular	Wed May 14 15:08:00 2025 +0000
@@ -0,0 +1,11 @@
+"decimalLatitude"	"decimalLongitude"	"scientificName"	"Development_direct development"	"Development_planktotrophic"	" Fecundity_"
+-49.355	70.215	"Abatus cordatus"	0	1	0
+-66.963303	163.223297	"Ctenocidaris spinosa"	1	0	0
+-42.45	-74.75833333	"Loxechinus albus"	0	1	0
+-37.606167	176.705167	"Ogmocidaris benhami"	0	1	0
+-36.201698	175.834198	"Peronella hinemoae"	0	1	0
+-37.494667	176.672501	"Phormosoma bursarium"	0	1	0
+-43.469	173.572	"Pseudechinus huttoni"	0	1	0
+-47.7	179.45	"Pseudechinus novaezealandiae"	0	1	0
+-74.72	164.2183333	"Sterechinus neumayeri"	0	1	0
+-70.51166667	-8.801	"Sterechinus sp"	NA	NA	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample.tabular	Wed May 14 15:08:00 2025 +0000
@@ -0,0 +1,11 @@
+"decimalLatitude"	"decimalLongitude"	"scientificName"
+"1838"	-66.963303	163.223297	"Ctenocidaris spinosa"
+"7923"	-70.51166667	-8.801	"Sterechinus sp"
+"4410"	-37.494667	176.672501	"Phormosoma bursarium"
+"10574"	-37.606167	176.705167	"Ogmocidaris benhami"
+"2050"	-74.72	164.2183333	"Sterechinus neumayeri"
+"2531"	-47.7	179.45	"Pseudechinus novaezealandiae"
+"4145"	-49.355	70.215	"Abatus cordatus"
+"9337"	-36.201698	175.834198	"Peronella hinemoae"
+"7481"	-43.469	173.572	"Pseudechinus huttoni"
+"9986"	-42.45	-74.75833333	"Loxechinus albus"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wormsmeasurements.R	Wed May 14 15:08:00 2025 +0000
@@ -0,0 +1,89 @@
+##05/05/2025
+##Jean Le Cras
+### Enrich dataset with data from WoRMS
+
+#load libraries
+library(tidyverse)
+library(worrms)
+library(fastDummies)
+
+### parameters
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) == 0) {
+    stop("This tool needs at least one argument")
+}
+
+occurrence <- read.csv(args[1], header=T, sep="\t") %>% arrange(scientificName)
+measurement_types <- unlist(str_split(args[2], ","))
+include_inherited <- ifelse(args[4]=="true", T, F)
+pivot_wider <- ifelse(args[5]=="true", T, F)
+scientificName_name <- args[3]
+
+
+### 
+extract_traits_values <- function(traits_data) {
+  result <- setNames(rep(NA, length(measurement_types)), measurement_types)
+  
+  if (is.null(traits_data) || nrow(traits_data) == 0) {
+    return(result)
+  }
+  
+  traits_filtered <- traits_data %>%
+    filter(measurementType %in% measurement_types) %>%
+    filter(!is.na(measurementValue))
+  
+  if (nrow(traits_filtered) == 0) {
+    return(result)
+  }
+  
+  for (i in 1:nrow(traits_filtered)) {
+    result[traits_filtered$measurementType[i]] <- traits_filtered$measurementValue[i]
+  }
+  return(result)
+}
+
+get_life_history_traits <- function(scientific_name) {
+  if (scientific_name %in% names(cache)) { 
+    return(cache[[scientific_name]])  
+  }
+  
+  worms_id <- tryCatch(
+    wm_name2id(name = scientific_name),
+    error = function(e) NA
+  )
+  
+  if (is.na(worms_id) || length(worms_id) == 0) {
+    cache[[scientific_name]] <<- NULL
+    return(NULL)
+  }
+  
+  data_attr <- tryCatch(
+    wm_attr_data(worms_id, include_inherited=include_inherited),
+    error = function(e) NULL
+  )
+  
+  if (is.null(data_attr)) {
+    cache[[scientific_name]] <<- NULL
+    return(NULL)
+  }
+  
+  traits <- extract_traits_values(data_attr)
+  cache[[scientific_name]] <<- traits
+  return(traits)
+}
+
+cache <- list()
+
+trait_data <- occurrence %>%
+  mutate(life_history_traits = map(.data[[scientificName_name]], ~ get_life_history_traits(.x)))
+
+view(trait_data)
+trait_data <- trait_data %>%
+  unnest_wider(life_history_traits)
+
+if (pivot_wider) {
+  trait_data <- dummy_cols(trait_data, select_columns = measurement_types, remove_selected_columns=T, ignore_na=T)
+
+}
+
+write.table(trait_data, "enriched_data.tabular", sep="\t", row.names = FALSE)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wormsmeasurements.xml	Wed May 14 15:08:00 2025 +0000
@@ -0,0 +1,118 @@
+<tool id="WormsMeasurements" name="Enrich dataset with WoRMS" version="0.1.1" profile="23.2">
+    <description>Enrich dataset with measurement type data from WoRMS</description>
+
+    <requirements>
+        <requirement type="package" version="4.3.3">r-base</requirement>
+        <requirement type="package" version="0.4.3">r-worrms</requirement>
+        <requirement type="package" version="1.1.4">r-dplyr</requirement>
+        <requirement type="package" version="2.0.0">r-tidyverse</requirement>
+        <requirement type="package" version="1.7.5">r-fastDummies</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+Rscript '$__tool_directory__/wormsmeasurements.R'
+    '$data'
+    '$measurement_types'
+    '$scientic_name'
+    '$include_inherited'
+    '$pivot_wider'
+    '$output'
+    ]]></command>
+
+    <inputs>
+        <param name="data" type="data" format="tabular" label="occurrence data"/>
+        <param name="measurement_types" type="text" format="txt" label="list of measurement types"/>
+        <param name="scientic_name" type="text" format="txt" label="scientific names column name" value="scientificName"/>
+        <param name="include_inherited" type="boolean" label="include attributes inherited from parent taxon" checked="false"/>        
+        <param name="pivot_wider" type="boolean" label="one hot encoding on the measurement types" checked="false"/>
+    </inputs>
+
+    <outputs>
+        <data name="output" from_work_dir="enriched_data.tabular" format="tabular" label="dataset enriched with measurments"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="data" value="sample.tabular"/>
+            <param name="measurement_types" value="Development, Fecundity"/>
+            <param name="scientic_name" value="scientificName"/>
+            <param name="include_inherited" value="true"/>
+            <param name="pivot_wider" value="false"/>
+            <output name="output" file="enriched_data_inherited.tabular"/>
+        </test>
+        <test>
+            <param name="data" value="sample.tabular"/>
+            <param name="measurement_types" value="Development, Fecundity"/>
+            <param name="scientic_name" value="scientificName"/>
+            <param name="include_inherited" value="false"/>
+            <param name="pivot_wider" value="false"/>
+            <output name="output" file="enriched_data.tabular"/>
+        </test>
+        <test>
+            <param name="data" value="sample.tabular"/>
+            <param name="measurement_types" value="Development, Fecundity"/>
+            <param name="include_inherited" value="true"/>
+            <param name="scientic_name" value="scientificName"/>
+            <param name="pivot_wider" value="true"/>
+            <output name="output" file="enriched_data_inherited_ohe.tabular"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+==================    
+**What it does ?**
+==================
+
+This tool requests WoRMS (World Register of Marine Species) to get data about a specific by accessing the entry returned by an its scientific name, it looks for the measurementType(s) requested by the user and select the associated measurement value to add it to a dataset.
+
+===================         
+**How to use it ?**
+===================
+
+## Parameters:
+
+- **data**: a dataset containing a variable of scientific names.
+- **list of measurement types**: a list of measurements types present in WoRMS (ex: Development, Fecundity, Size ...) separated by ','.
+- **scientific names column name**: the name of column in the dataset containing scientific names.
+- **include attributes inherited from parent taxon**: usefull when the data you are looking for are incomplete.
+- **one hot encoding on the measurement types**: each possible values of a measurementType becomes a column encoded in 0/1
+
+## Outputs:
+
+The inputed dataset with columns of measurement types or measurements
+
+**Example of input data :**
+"decimalLatitude"	"decimalLongitude"	"scientificName"
+-49.355	70.215	"Abatus cordatus"	"planktotrophic"	NA
+-66.963303	163.223297	"Ctenocidaris spinosa"
+-42.45	-74.75833333	"Loxechinus albus"
+-37.606167	176.705167	"Ogmocidaris benhami"
+-36.201698	175.834198	"Peronella hinemoae"
+-37.494667	176.672501	"Phormosoma bursarium"
+-43.469	173.572	"Pseudechinus huttoni"
+-47.7	179.45	"Pseudechinus novaezealandiae"
+-74.72	164.2183333	"Sterechinus neumayeri"
+-70.51166667	-8.801	"Sterechinus sp"
+
+**Example of output data :**
+"decimalLatitude"	"decimalLongitude"	"scientificName"	"Development"	" Fecundity"
+-49.355	70.215	"Abatus cordatus"	"planktotrophic"	NA
+-66.963303	163.223297	"Ctenocidaris spinosa"	"direct development"	NA
+-42.45	-74.75833333	"Loxechinus albus"	"planktotrophic"	NA
+-37.606167	176.705167	"Ogmocidaris benhami"	"planktotrophic"	NA
+-36.201698	175.834198	"Peronella hinemoae"	"planktotrophic"	NA
+-37.494667	176.672501	"Phormosoma bursarium"	"planktotrophic"	NA
+-43.469	173.572	"Pseudechinus huttoni"	"planktotrophic"	NA
+-47.7	179.45	"Pseudechinus novaezealandiae"	"planktotrophic"	NA
+-74.72	164.2183333	"Sterechinus neumayeri"	"planktotrophic"	NA
+-70.51166667	-8.801	"Sterechinus sp"	NA	NA
+
+]]></help>
+
+    <citations>
+        <citation type="doi">10.32614/CRAN.package.dplyr</citation>
+        <citation type="doi">10.32614/CRAN.package.tidyverse</citation>
+        <citation type="doi">10.32614/CRAN.package.worrms</citation>
+        <citation type="doi">10.32614/CRAN.package.fastDummies</citation>
+    </citations>
+</tool>