Mercurial > repos > ecology > wormsmeasurements
annotate wormsmeasurements.R @ 1:6f75ab89587a draft default tip
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
author | ecology |
---|---|
date | Wed, 28 May 2025 10:13:42 +0000 |
parents | 23b963a1284e |
children |
rev | line source |
---|---|
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
1 ##05/05/2025 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
2 ##Jean Le Cras |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
3 ### Enrich dataset with data from WoRMS |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
4 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
5 #load libraries |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
6 library(tidyverse) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
7 library(worrms) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
8 library(fastDummies) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
9 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
10 ### parameters |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
11 args <- commandArgs(trailingOnly = TRUE) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
12 if (length(args) == 0) { |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
13 stop("This tool needs at least one argument") |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
14 } |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
15 |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
16 scientificName_name <- args[3] |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
17 occurrence <- read.csv(args[1], header=T, sep="\t") %>% |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
18 arrange(.[[scientificName_name]]) |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
19 measurement_types <- unlist(str_split(args[2], ",")) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
20 include_inherited <- ifelse(args[4]=="true", T, F) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
21 pivot_wider <- ifelse(args[5]=="true", T, F) |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
22 exclude_NA <- ifelse(args[6]=="true", T, F) |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
23 |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
24 # regex to only keep genus and specific epithet from scientific names |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
25 regex_find <- "^([A-Z][^A-Z(]+)(.*)$" |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
26 regex_replace <- "\\1" |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
27 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
28 |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
29 # function to extract the measurement values from the attributes data tibble |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
30 extract_traits_values <- function(traits_data) { |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
31 result <- setNames(rep(NA, length(measurement_types)), measurement_types) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
32 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
33 if (is.null(traits_data) || nrow(traits_data) == 0) { |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
34 return(result) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
35 } |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
36 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
37 traits_filtered <- traits_data %>% |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
38 filter(measurementType %in% measurement_types) %>% |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
39 filter(!is.na(measurementValue)) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
40 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
41 if (nrow(traits_filtered) == 0) { |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
42 return(result) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
43 } |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
44 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
45 for (i in 1:nrow(traits_filtered)) { |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
46 result[traits_filtered$measurementType[i]] <- traits_filtered$measurementValue[i] |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
47 } |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
48 return(result) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
49 } |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
50 |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
51 # function to call the call the WoRMS API and get the measurement values |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
52 get_life_history_traits <- function(scientific_name) { |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
53 clean_scientific_name <- trimws(gsub(regex_find, regex_replace, scientific_name)) |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
54 |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
55 if (clean_scientific_name %in% names(cache)) { |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
56 return(cache[[clean_scientific_name]]) |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
57 } |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
58 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
59 worms_id <- tryCatch( |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
60 wm_name2id(name = clean_scientific_name), |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
61 error = function(e) NA |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
62 ) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
63 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
64 if (is.na(worms_id) || length(worms_id) == 0) { |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
65 cache[[clean_scientific_name]] <<- NULL |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
66 return(NULL) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
67 } |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
68 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
69 data_attr <- tryCatch( |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
70 wm_attr_data(worms_id, include_inherited=include_inherited), |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
71 error = function(e) NULL |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
72 ) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
73 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
74 if (is.null(data_attr)) { |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
75 cache[[clean_scientific_name]] <<- NULL |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
76 return(NULL) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
77 } |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
78 |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
79 traits <- extract_traits_values(data_attr) |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
80 cache[[clean_scientific_name]] <<- traits |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
81 return(traits) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
82 } |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
83 |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
84 # a cache to limit API calls |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
85 cache <- list() |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
86 |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
87 # add a columns conataining the lists of values of the measurments requested |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
88 trait_data <- occurrence %>% |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
89 mutate(life_history_traits = map(.data[[scientificName_name]], ~ get_life_history_traits(.x))) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
90 |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
91 # convert the column of lists to multiple columns of unique values |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
92 trait_data <- trait_data %>% |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
93 unnest_wider(life_history_traits) |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
94 |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
95 # make sur each measurement type has a column |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
96 for (col in measurement_types) { |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
97 if (!(col %in% names(trait_data))) { |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
98 trait_data[[col]] <- NA |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
99 } |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
100 } |
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
101 |
1
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
102 # list of quantitativ measurements |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
103 numeric_cols <- c() |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
104 |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
105 # try to convert columns to numeric and remember them |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
106 trait_data <- trait_data %>% |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
107 mutate(across(all_of(measurement_types), ~ { |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
108 numeric_col <- suppressWarnings(as.numeric(.)) |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
109 if (all(is.na(.) == is.na(numeric_col))) { |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
110 numeric_cols <<- c(numeric_cols, cur_column()) |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
111 numeric_col |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
112 } else { |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
113 . |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
114 } |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
115 })) |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
116 |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
117 # filter NA but only in the added columns |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
118 if (exclude_NA) { |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
119 trait_data <- trait_data[complete.cases(trait_data[, measurement_types]),] |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
120 } |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
121 |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
122 # determine what are the qualitativ columns to be one hot encoded |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
123 factor_cols <- setdiff(measurement_types, numeric_cols) |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
124 |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
125 # one hot encode quantitativ columns |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
126 if (pivot_wider & length(factor_cols) > 0) { |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
127 trait_data <- dummy_cols(trait_data, select_columns = factor_cols, remove_selected_columns=T, ignore_na=T) |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
128 } |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
129 |
6f75ab89587a
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit ced658540f05bb07e1e687af30a3fa4ea8e4803c
ecology
parents:
0
diff
changeset
|
130 # write the enriched dataset as tabular |
0
23b963a1284e
planemo upload for repository https://github.com/jeanlecras/tools-ecology/tree/master/tools/WormsMeasurements commit 1f5e22a210b8a395f1c7b48f54e03e781a1b34c4
ecology
parents:
diff
changeset
|
131 write.table(trait_data, "enriched_data.tabular", sep="\t", row.names = FALSE) |