Mercurial > repos > ecology > aquainfra
changeset 0:3073e68373a5 draft default tip
planemo upload for repository https://github.com/AquaINFRA/tools-ecology/tree/master commit d07f89c8f6d6ee44c05de6b1b8047b387e76cad6
author | ecology |
---|---|
date | Thu, 10 Oct 2024 09:56:27 +0000 |
parents | |
children | |
files | aquainfra_ogc_api_processes.R aquainfra_ogc_api_processes.xml macros.xml test-data/points_att_polygon_test_input_1.txt test-data/points_att_polygon_test_input_2.txt test-data/points_att_polygon_test_input_3.txt |
diffstat | 6 files changed, 483 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aquainfra_ogc_api_processes.R Thu Oct 10 09:56:27 2024 +0000 @@ -0,0 +1,329 @@ +library("httr2") +library("jsonlite") +library("getopt") + +cat("start generic wrapper service \n") + +remove_null_values <- function(x) { + # Check if the input is a list + if (is.list(x)) { + # Remove NULL values and apply the function recursively to sublists + x <- lapply(x, remove_null_values) + x <- x[!sapply(x, is.null)] + } + return(x) +} + +getParameters <- function() { + con <- file("inputs.json", "r") + lines <- readLines(con) + close(con) + + json_string <- paste(lines, collapse = "\n") + json_data <- fromJSON(json_string) + + # Remove NULL values from json_data + cleaned_json_data <- remove_null_values(json_data) + return(cleaned_json_data$conditional_process) +} + +parseResponseBody <- function(body) { + hex <- c(body) + intValues <- as.integer(hex) + rawVector <- as.raw(intValues) + readableOutput <- rawToChar(rawVector) + jsonObject <- jsonlite::fromJSON(readableOutput) + return(jsonObject) +} + +getOutputs <- function(inputs, output, server) { + url <- + paste(paste(server, "/processes/", sep = ""), + inputs$select_process, + sep = "") + request <- request(url) + response <- req_perform(request) + responseBody <- parseResponseBody(response$body) + outputs <- list() + + for (x in 1:length(responseBody$outputs)) { + outputformatName <- + paste(names(responseBody$outputs[x]), "_outformat", sep = "") + output_item <- list() + + for (p in names(inputs)) { + if (p == outputformatName) { + format <- list("mediaType" = inputs[[outputformatName]]) + output_item$format <- format + } + } + output_item$transmissionMode <- "reference" + outputs[[x]] <- output_item + } + + names(outputs) <- names(responseBody$outputs) + return(outputs) +} + +executeProcess <- function(url, process, requestBodyData) { + url <- + paste(paste(paste(url, "processes/", sep = ""), process, sep = ""), "/execution", sep = "") + requestBodyData$inputs$select_process <- NULL + + body <- list() + body$inputs <- requestBodyData$inputs + + response <- request(url) %>% + req_headers("Content-Type" = "application/json", + "Prefer" = "respond-async") %>% + req_body_json(body) %>% + req_perform() + + cat("\n Process executed") + cat("\n status: ", response$status_code) + #if ( process == "barplot-trend-results") { + # process = "batplot-trend-results" + #} + #href <- parseResponseBody(response$body)$outputs[[gsub("-", "_", process)]]$href + jobId <- parseResponseBody(response$body)$jobID + + return(jobId) +} + +checkJobStatus <- function(server, process, jobID) { + url <- paste0(server, "jobs/", jobID) + response <- request(url) %>% + req_perform() + jobStatus <- parseResponseBody(response$body)$status + jobProgress <- parseResponseBody(response$body)$progress + return(jobStatus) +} + +getStatusCode <- function(server, process, jobID) { + url <- paste0(server, "jobs/", jobID) + print(url) + response <- request(url) %>% + req_perform() + status_code <- response$status_code + return(status_code) +} + +getResult <- function (server, process, jobID) { + response <- + request(paste0(server, "jobs/", jobID, "/results?f=json")) %>% + req_perform() + return(response) +} + +# Recursive function to search for href in a nested list +findHref <- function(obj) { + hrefs <- c() # Initialize an empty vector to store hrefs + + if (is.list(obj)) { + # If the object is a list, loop through its elements + for (name in names(obj)) { + element <- obj[[name]] + + if (is.list(element)) { + # Recursively search if the element is another list + hrefs <- c(hrefs, findHref(element)) + } else if (name == "href") { + # If the element has a name "href", capture its value + hrefs <- c(hrefs, element) + } + } + } + return(hrefs) +} + +retrieveResults <- function(server, process, jobID, outputData) { + status_code <- getStatusCode(server, process, jobID) + print(status_code) + + if (status_code == 200) { + status <- "running" + + while (status == "running") { + jobStatus <- checkJobStatus(server, process, jobID) + print(jobStatus) + + if (jobStatus == "successful") { + status <- jobStatus + result <- getResult(server, process, jobID) + + if (result$status_code == 200) { + resultBody <- parseResponseBody(result$body) + print(resultBody) + + # Call the recursive function to find all hrefs + hrefs <- findHref(resultBody) + + if (length(hrefs) > 0) { + # Collapse the URLs with a newline + urls_with_newline <- paste(hrefs, collapse = "\n") + print(urls_with_newline) + + # Write the URLs to a file + con <- file(outputData, "w") + writeLines(urls_with_newline, con = con) + close(con) + } else { + print("No hrefs found.") + } + } + } else if (jobStatus == "failed") { + status <- jobStatus + } + Sys.sleep(3) + } + + cat("\n done \n") + + } else if (status_code1 == 400) { + print("A query parameter has an invalid value.") + } else if (status_code1 == 404) { + print("The requested URI was not found.") + } else if (status_code1 == 500) { + print("The requested URI was not found.") + } else { + print(paste("HTTP", status_code1, "Error:", resp1$status_message)) + } +} + + + +saveResult <- function(href, outputData) { + con <- file(outputData, "w") + writeLines(href, con = con) + close(con) +} + +is_url <- function(x) { + grepl("^https?://", x) +} + +server <- "https://aqua.igb-berlin.de/pygeoapi-dev/" + +print("--> Retrieve parameters") +inputParameters <- getParameters() +#print(inputParameters) +print("--> Parameters retrieved") + +args <- commandArgs(trailingOnly = TRUE) +outputLocation <- args[2] + +print("--> Retrieve outputs") +outputs <- getOutputs(inputParameters, outputLocation, server) +print("--> Outputs retrieved") + +print("--> Parse inputs") +convertedKeys <- c() +for (key in names(inputParameters)) { + if (is.character(inputParameters[[key]]) && + (endsWith(inputParameters[[key]], ".dat") || + endsWith(inputParameters[[key]], ".txt"))) { + con <- file(inputParameters[[key]], "r") + url_list <- list() + #while (length(line <- readLines(con, n = 1)) > 0) { + # if (is_url(line)) { + # url_list <- c(url_list, list(list(href = trimws(line)))) + # } + #} + con <- file(inputParameters[[key]], "r") + lines <- readLines(con) + print("--------------------------------------------------------------------1") + print(length(lines)) + close(con) + if (!length(lines) > 1 && endsWith(lines, ".jp2") && startsWith(lines, "https")) { + print("--------------------------------------------------------------------2") + tmp <- list() + tmp$href <- lines + tmp$type <- "image/jp2" + inputParameters[[key]] <- tmp + } + else if (!length(lines) > 1 && endsWith(lines, ".zip") && startsWith(lines, "https")) { + print("--------------------------------------------------------------------3") + json_string <- paste(lines, collapse = "\n") + inputParameters[[key]] <- json_string + } else if (!length(lines) > 1 && (endsWith(lines, ".xlsx") || endsWith(lines, ".csv") || grepl("f=csv", lines)) && startsWith(lines, "https")) { + print("--------------------------------------------------------------------4") + json_string <- paste(lines, collapse = "\n") + inputParameters[[key]] <- json_string + } else if (inputParameters$select_process == "plot-image" || + inputParameters$select_process == "reproject-image") { + print("--------------------------------------------------------------------5") + tmp <- list() + tmp$href <- lines + tmp$type <- "image/tiff; application=geotiff" + if (inputParameters$select_process == "reproject-image") { + tmp$type <- "image/tiff; subtype=geotiff" + } + inputParameters[[key]] <- tmp + } else { + print("-----------------------------------6") + json_string <- paste(lines, collapse = "\n") + json_data <- fromJSON(json_string) + inputParameters[[key]] <- json_data + } + convertedKeys <- append(convertedKeys, key) + } + else if (grepl("_Array_", key)) { + keyParts <- strsplit(key, split = "_")[[1]] + type <- keyParts[length(keyParts)] + values <- inputParameters[[key]] + value_list <- strsplit(values, split = ",") + convertedValues <- c() + + for (value in value_list) { + if (type == "integer") { + value <- as.integer(value) + } else if (type == "numeric") { + value <- as.numeric(value) + } else if (type == "character") { + value <- as.character(value) + } + convertedValues <- append(convertedValues, value) + + convertedKey <- "" + for (part in keyParts) { + if (part == "Array") { + break + } + convertedKey <- + paste(convertedKey, paste(part, "_", sep = ""), sep = "") + } + convertedKey <- substr(convertedKey, 1, nchar(convertedKey) - 1) + } + + inputParameters[[key]] <- convertedValues + print("-------------------------") + print(convertedValues) + print("-------------------------") + convertedKeys <- append(convertedKeys, convertedKey) + } else { + print("-------------------------") + print(key) + print(inputParameters[[key]]) + if (!is.null(inputParameters[[key]])) { + convertedKeys <- append(convertedKeys, key) + } + print("-------------------------") + + } +} +print(inputParameters) +names(inputParameters) <- convertedKeys +#print(inputParameters) +print("--> Inputs parsed") + +print("--> Prepare process execution") +jsonData <- list("inputs" = inputParameters, + "outputs" = outputs) + +print("--> Execute process") +jobId <- executeProcess(server, inputParameters$select_process, jsonData) +print("--> Process executed") + +print("--> Retrieve results") +retrieveResults(server, inputParameters$select_process, jobId, outputLocation) +print("--> Results retrieved") \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aquainfra_ogc_api_processes.xml Thu Oct 10 09:56:27 2024 +0000 @@ -0,0 +1,100 @@ +<tool id="aquainfra" name="AquaINFRA OGC API Processes" version="0.1.0" profile="22.05"> + <description/> + <macros> + <import>macros.xml</import> + </macros> + <creator> + <organization name="EOSC AquaINFRA" url="https://aquainfra.eu/"/> + </creator> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + Rscript '$__tool_directory__/aquainfra_ogc_api_processes.R' + --outputData '$output_data' + ]]></command> + <configfiles> + <inputs name="inputs" filename="inputs.json" data_style="paths"/> + </configfiles> + <inputs> + <conditional name="conditional_process"> + <param name="select_process" type="select" label="Select process"> + <option value="points-att-polygon">points-att-polygon: Group points by region</option> + <option value="peri-conv">peri-conv: Group data to groups based on date</option> + <option value="mean-by-group">mean-by-group: Return group average</option> + <option value="ts-selection-interpolation">ts-selection-interpolation: Select and Interpolate Time Series</option> + <option value="trend-analysis-mk">trend-analysis-mk: Man-Kendall Trend Analysis on Time Series</option> + <option value="barplot-trend-results">barplot-trend-results: Visualisation of statistical analysis results</option> + <option value="map-trends-static">map-trends-static: Spatial visualisation of regions and data points</option> + <option value="map-shapefile-points">map-shapefile-points: Spatial visualisation of regions and data points</option> + </param> + <when value="points-att-polygon"> + <param name="regions" label="Study region or study subregions" optional="false" help="URL (stored in a .txt file) to the study region, or several regions, to classify your input data into groups of interest. Currently it has to be provided as a shapefile. It can be in any coordinate system and will be transformed to WGS84 during this process." type="data" format="txt"/> + <param name="input_data" label="Table to be merged with study region" optional="false" help="URL (stored in a .txt file) to the input table containing the in-situ data points with coordinates. Can be provided as Excel file or CSV file (comma-separated text file). The coordinates have to be in WGS84 coordinate system." type="data" format="txt"/> + <param name="colname_long" label="Column name for longitude" optional="false" help="Name of the column that contains longitude values (in WGS84)." type="text"/> + <param name="colname_lat" label="Column name for latitude" optional="false" help="Name of the column that contains latitude values (in WGS84)." type="text"/> + </when> + <when value="peri-conv"> + <param name="input_data" label="Table to be grouped by date, with date colum" help="URL (stored in a .txt file) to the table with a column containing a date. It can have other columns which will not be changed during this process." type="data" format="txt"/> + <param name="colname_date" label="Date column name" optional="false" help="Column name of the date column in the format defined above. Example: 'visit date'." type="text"/> + <param name="date_format" label="Date format" optional="true" help="The date format used to parse the date, i.e. to extract day, month and year from the date column, e.g. or 'y-m-d' for dates like '1998-08-22' (this is the default) or 'y/m/d' for dates like '1998/08/22'." type="select"> + <option value="y-m-d">y-m-d</option> + <option value="y/m/d">y/m/d</option> + </param> + <param name="group_to_periods" label="Periods to group the data into" optional="true" help="Define the periods that you want the data to be grouped into. Please follow the example: 'Dec-01:Mar-01,Mar-02:May-30,Jun-01:Aug-30,Sep-01:Nov-30' (first three letters of each month, then a minus/hyphen, then the day (two digits), then comma, then the next period)." type="text"> + <validator type="regex">^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3][0-9]:(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3][0-9](,(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3][0-9]:(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3][0-9])*$</validator> + </param> + <param name="group_labels" label="Period labels" optional="true" help="Define names for the periods defined above, separated by a comma. Example: 'winter,spring,summer,autumn'." type="text"/> + <param name="year_starts_at_dec1" label="Include December into next year" help="Tell whether the years in your grouping starts at first of December ('true') or not ('false'). This has to be reflected in the period definitions, these should not contradict each other." type="boolean" truevalue="true" falsevalue="false" checked="true"/> + </when> + <when value="mean-by-group"> + <param name="input_data" label="Input table" optional="false" help="URL (stored in a .txt file) to the input table containing group identifier(s) and a value column. Groups are defined by one or more columns. Other columns present in the table will be removed in the process, retaining only those specified in cols_to_group_by and value_col. For example, use the result table from peri_conv. " type="data" format="txt"/> + <param name="colnames_to_group_by" label="Column names identifying group" optional="false" help="One or more column names identifying the group. A combination of all specified columns will be used to define unique groups for calculating the average value. Example: 'longitude, latitude, Year_adj_generated, group_labels, HELCOM_ID'. Another example: 'Year_adj_generated, group_labels, HELCOM_ID'." type="text"/> + <param name="colname_value" label="Column name for column containing values" optional="false" help="The name of the column that contains the values for which the average will be calculated. Only one column name can be included. Examples: 'transparency_m', 'mean'" type="text"/> + </when> + <when value="ts-selection-interpolation"> + <param name="input_data" label="Input table" optional="false" help="URL (stored in a .txt file) to the input table containing data for selection and interpolation. This table includes grouping variables (if applicable), the year (or other time identifier) and the value columns to be interpolated. For example, use the result from mean_by_group." type="data" format="txt"/> + <param name="colnames_relevant" label="Column names identifying group(s)" optional="false" help="Column name(s) describing relevant values in the dataset. These columns are treated as grouping identifiers, and a combination of all specified columns will be used to define unique groups. For each group, a separate time series is analyzed and interpolated individually." type="text"/> + <param name="missing_threshold_percentage" label="Threshold for missing values" optional="false" help="Threshold for the allowed percentage of missing data points (NAs). For example, a value of 40 means series with more than 40% missing data will be removed. Default = 30." value="30" min="0" max="100" type="float"/> + <param name="colname_year" label=" Column name for time" optional="false" help="The name of the column containing the year (or other time identifier, such as quarter, month, or day). Example = 'year'." type="text"/> + <param name="colname_value" label="Column name for values" optional="false" help="The name of the column containing the values to be considered for interpolation." type="text"/> + <param name="min_data_point" label="Minimum number of data points required" optional="false" help="The minimum number of data points required in a time series for it to be included in the interpolation process. Default = 10." value="10" min="0" type="integer"/> + </when> + <when value="trend-analysis-mk"> + <param name="input_data" label="Input table" optional="false" help="URL (stored in a .txt file) to the input table containing the time series data. This table includes grouping identifiers (if applicable), columns for time (e.g., year, month) and values to be analyzed for trends. For example, use the result from ts_selection_interpolation." type="data" format="txt"/> + <param name="colnames_relevant" label="Column names identifying group(s)" optional="false" help="Column name(s) identifying relevant groups in the dataset. These columns will be used to define unique groups for which separate trend analyses are performed." type="text"/> + <param name="colname_time" label="Column name for time" optional="false" help="The name of the column containing the time variable (e.g., year, month) to be used in the trend analysis. Example = 'year'." type="text"/> + <param name="colname_value" label="Column name for values" optional="false" help="The name of the column containing the values to be analyzed in the Man-Kendall trend test." type="text"/> + </when> + <when value="barplot-trend-results"> + <param name="input_data" label="Input table" optional="false" help="URL (stored in a .txt file) to the input table containing statistical analysis results. The table must include columns for test values, p-values, and group identifiers." type="data" format="txt"/> + <param name="colname_id" label="Column name for identifier" optional="false" help="The name of the column containing group or sample identifiers, e.g., polygon id. Example = 'polygon_id'" type="text"/> + <param name="colname_test_value" label="Column name for test value" optional="false" help="The name of the column containing the test values (e.g., statistical coefficients) to be plotted on the Y-axis, e.g., Tau for Mann-Kendall test." type="text"/> + <param name="colname_p_value" label="Column name for p value" optional="false" help="The name of the column containing p values, used to determine bar transparency. Example = 'p_value'" type="text"/> + <param name="p_value_threshold" label="p value threshold for significance" optional="false" help="The threshold for distinguishing significant from insignificant values. It adjusts the transparency of bars in the plot. Example = 0.05" value="0.05" min="0.01" max="0.1" type="float"/> + <param name="colname_group" label="Column name for subgroups" optional="false" help="The name of the column that defines the subgroups or categories to be displayed on the X-axis, e.g., seasons for every polygon_id." type="text"/> + </when> + <when value="map-trends-static"> + <param name="regions" label="Study region or study subregions" optional="false" help="URL (stored in a .txt file) to the study region, or several regions, used to classify the input data into groups of interest. Currently it has to be provided as a shapefile. It can be in any coordinate system and will be transformed to WGS84 during this process." type="data" format="txt"/> + <param name="input_data" label="Input table" optional="false" help="URL to the input table containing statistical analysis results. The table must include columns for test values, p-values, and identifiers linking to study region." type="data" format="txt"/> + <param name="colname_id_trend" label="Column name of study region identifier" optional="false" help="The name of the column containing identifiers for study regions, which must correspond to the identifiers in the shapefile (shp). Example = 'id'." type="text"/> + <param name="colname_region_id" label="Column name of study region identifier" optional="false" help="The name of the column in the input data that contains identifiers for study regions, corresponding to the identifiers in the shapefile. Example = 'id'." type="text"/> + <param name="colname_group" label="Column name for subgroups" optional="false" help="The name of the column that defines the subgroups or categories to be displayed on the X-axis, e.g., seasons for every polygon_id." type="text"/> + <param name="p_value_threshold" label="p value threshold for significance" optional="false" help="The threshold for distinguishing significant from insignificant values. It adjusts the transparency of bars in the plot. Example = 0.05." value="0.05" min="0.01" max="0.1" type="float"/> + <param name="colname_p_value" label="Column name for p value" optional="false" help="The name of the column containing p values, used to determine bar transparency. Example = 'p_value'" type="text"/> + </when> + <when value="map-shapefile-points"> + <param name="regions" label="Study region or study subregions" optional="false" help="URL (stored in a .txt file) to the study region, or several regions, to classify the input data into groups of interest. Currently it has to be provided as a shapefile. It can be in any coordinate system and will be transformed to WGS84 during this process." type="data" format="txt"/> + <param name="input_data" label="Data table to be plotted on top of study regions" optional="false" help="URL (stored in a .txt file) to the input table containing the in-situ data points with coordinates." type="data" format="txt"/> + <param name="colname_long" label="Column name for longitude" optional="false" help="The name of the column containing longitude values for the data points." type="text"/> + <param name="colname_lat" label="Column name for latiitude" optional="false" help="The name of the column containing latitude values for the data points." type="text"/> + <param name="colname_value_name" label="Column name of data point identifier" optional="false" help="The name of the column containing identifier (e.g., site name) or values (e.g., depth) to color the points according to their corresponding values." type="text"/> + <param name="colname_region_id" label="Column name of region identifier" optional="false" help="The name of the column containing identifiers (e.g., basin name) to distinguish the polygons on the map if multiple regions are present." type="text"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output_data" format="txt" label="$select_process"/> + </outputs> + <expand macro="tests"/> + <expand macro="help" /> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Oct 10 09:56:27 2024 +0000 @@ -0,0 +1,51 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="4.3.1">r-base</requirement> + <requirement type="package" version="1.20.4">r-getopt</requirement> + <requirement type="package" version="0.2.3">r-httr2</requirement> + <requirement type="package" version="1.8.7">r-jsonlite</requirement> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex">@Manual{httr2, title = {httr2: Perform HTTP Requests and Process the Responses}, author = {Hadley Wickham}, year = {2023}, note = {R package version 1.0.0, https://github.com/r-lib/httr2}, url = {https://httr2.r-lib.org},}</citation> + <citation type="doi">10.48550/arXiv.1403.2805</citation> + </citations> + </xml> + <xml name="help"> + <help> + Use the dropdown menu at the top to select the OGC API processes hosted on https://aqua.igb-berlin.de/pygeoapi-dev/processes and then complete the corresponding form to run the service. + </help> + </xml> + <xml name="tests"> + <tests> + <test> + <param name="select_process" value="points-att-polygon"/> + <param name="regions" value="points_att_polygon_test_input_1.txt"/> + <param name="input_data" value="points_att_polygon_test_input_2.txt"/> + <param name="colname_long" value="longitude"/> + <param name="colname_lat" value="latitude"/> + <output name="output_data"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test> + <param name="select_process" value="map-shapefile-points"/> + <param name="regions" value="points_att_polygon_test_input_1.txt"/> + <param name="input_data" value="points_att_polygon_test_input_3.txt"/> + <param name="colname_long" value="longitude"/> + <param name="colname_lat" value="latitude"/> + <param name="colname_value_name" value="transparency_m"/> + <param name="colname_region_id" value="HELCOM_ID"/> + <output name="output_data"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + </tests> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/points_att_polygon_test_input_1.txt Thu Oct 10 09:56:27 2024 +0000 @@ -0,0 +1,1 @@ +https://maps.helcom.fi/arcgis/rest/directories/arcgisoutput/MADS/tools_GPServer/_ags_HELCOM_subbasin_with_coastal_WFD_waterbodies_or_wa.zip \ No newline at end of file