annotate wormsmeasurements.R @ 1:6f75ab89587a draft default tip

planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
author ecology
date Wed, 28 May 2025 10:13:42 +0000
parents 23b963a1284e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
1 ##05/05/2025
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
2 ##Jean Le Cras
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
3 ### Enrich dataset with data from WoRMS
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
4
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
5 #load libraries
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
6 library(tidyverse)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
7 library(worrms)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
8 library(fastDummies)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
9
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
10 ### parameters
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
11 args <- commandArgs(trailingOnly = TRUE)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
12 if (length(args) == 0) {
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
13 stop("This tool needs at least one argument")
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
14 }
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
15
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
16 scientificName_name <- args[3]
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
17 occurrence <- read.csv(args[1], header=T, sep="\t") %>%
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
18 arrange(.[[scientificName_name]])
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
19 measurement_types <- unlist(str_split(args[2], ","))
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
20 include_inherited <- ifelse(args[4]=="true", T, F)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
21 pivot_wider <- ifelse(args[5]=="true", T, F)
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
22 exclude_NA <- ifelse(args[6]=="true", T, F)
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
23
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
24 # regex to only keep genus and specific epithet from scientific names
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
25 regex_find <- "^([A-Z][^A-Z(]+)(.*)$"
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
26 regex_replace <- "\\1"
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
27
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
28
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
29 # function to extract the measurement values from the attributes data tibble
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
30 extract_traits_values <- function(traits_data) {
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
31 result <- setNames(rep(NA, length(measurement_types)), measurement_types)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
32
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
33 if (is.null(traits_data) || nrow(traits_data) == 0) {
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
34 return(result)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
35 }
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
36
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
37 traits_filtered <- traits_data %>%
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
38 filter(measurementType %in% measurement_types) %>%
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
39 filter(!is.na(measurementValue))
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
40
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
41 if (nrow(traits_filtered) == 0) {
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
42 return(result)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
43 }
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
44
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
45 for (i in 1:nrow(traits_filtered)) {
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
46 result[traits_filtered$measurementType[i]] <- traits_filtered$measurementValue[i]
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
47 }
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
48 return(result)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
49 }
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
50
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
51 # function to call the call the WoRMS API and get the measurement values
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
52 get_life_history_traits <- function(scientific_name) {
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
53 clean_scientific_name <- trimws(gsub(regex_find, regex_replace, scientific_name))
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
54
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
55 if (clean_scientific_name %in% names(cache)) {
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
56 return(cache[[clean_scientific_name]])
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
57 }
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
58
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
59 worms_id <- tryCatch(
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
60 wm_name2id(name = clean_scientific_name),
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
61 error = function(e) NA
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
62 )
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
63
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
64 if (is.na(worms_id) || length(worms_id) == 0) {
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
65 cache[[clean_scientific_name]] <<- NULL
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
66 return(NULL)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
67 }
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
68
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
69 data_attr <- tryCatch(
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
70 wm_attr_data(worms_id, include_inherited=include_inherited),
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
71 error = function(e) NULL
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
72 )
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
73
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
74 if (is.null(data_attr)) {
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
75 cache[[clean_scientific_name]] <<- NULL
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
76 return(NULL)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
77 }
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
78
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
79 traits <- extract_traits_values(data_attr)
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
80 cache[[clean_scientific_name]] <<- traits
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
81 return(traits)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
82 }
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
83
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
84 # a cache to limit API calls
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
85 cache <- list()
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
86
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
87 # add a columns conataining the lists of values of the measurments requested
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
88 trait_data <- occurrence %>%
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
89 mutate(life_history_traits = map(.data[[scientificName_name]], ~ get_life_history_traits(.x)))
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
90
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
91 # convert the column of lists to multiple columns of unique values
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
92 trait_data <- trait_data %>%
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
93 unnest_wider(life_history_traits)
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
94
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
95 # make sur each measurement type has a column
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
96 for (col in measurement_types) {
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
97 if (!(col %in% names(trait_data))) {
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
98 trait_data[[col]] <- NA
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
99 }
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
100 }
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
101
1
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
102 # list of quantitativ measurements
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
103 numeric_cols <- c()
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
104
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
105 # try to convert columns to numeric and remember them
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
106 trait_data <- trait_data %>%
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
107 mutate(across(all_of(measurement_types), ~ {
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
108 numeric_col <- suppressWarnings(as.numeric(.))
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
109 if (all(is.na(.) == is.na(numeric_col))) {
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
110 numeric_cols <<- c(numeric_cols, cur_column())
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
111 numeric_col
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
112 } else {
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
113 .
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
114 }
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
115 }))
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
116
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
117 # filter NA but only in the added columns
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
118 if (exclude_NA) {
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
119 trait_data <- trait_data[complete.cases(trait_data[, measurement_types]),]
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
120 }
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
121
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
122 # determine what are the qualitativ columns to be one hot encoded
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
123 factor_cols <- setdiff(measurement_types, numeric_cols)
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
124
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
125 # one hot encode quantitativ columns
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
126 if (pivot_wider & length(factor_cols) > 0) {
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
127 trait_data <- dummy_cols(trait_data, select_columns = factor_cols, remove_selected_columns=T, ignore_na=T)
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
128 }
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
129
6f75ab89587a planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents: 0
diff changeset
130 # write the enriched dataset as tabular
0
23b963a1284e planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff changeset
131 write.table(trait_data, "enriched_data.tabular", sep="\t", row.names = FALSE)