Mercurial > repos > ecology > aquainfra_ogc_api_processes

<tool id="aquainfra_ogc_api_processes" name="AquaINFRA OGC API Processes" version="0.8.0" profile="22.05">
    <description/>
    <macros>
        <import>macros.xml</import>
    </macros>
    <creator>
        <organization name="EOSC AquaINFRA" url="https://aquainfra.eu/"/>
    </creator>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
    Rscript '$__tool_directory__/aquainfra_ogc_api_processes.R'
        --outputData '$output_data'
    ]]></command>
    <configfiles>
        <inputs name="inputs" filename="inputs.json" data_style="paths"/>
    </configfiles>
    <inputs>
        <conditional name="conditional_process">
            <param name="select_process" type="select" label="Select process">
                <option value="points-att-polygon">points-att-polygon: Group points by region</option>
                <option value="peri-conv">peri-conv: Group data to groups based on date</option>
                <option value="mean-by-group">mean-by-group: Return group average</option>
                <option value="ts-selection-interpolation">ts-selection-interpolation: Select and Interpolate Time Series</option>
                <option value="trend-analysis-mk">trend-analysis-mk: Man-Kendall Trend Analysis on Time Series</option>
                <option value="barplot-trend-results">barplot-trend-results: Visualisation of statistical analysis results</option>
                <option value="map-trends-static">map-trends-static: Spatial visualisation of regions and data points</option>
                <option value="map-shapefile-points">map-shapefile-points: Spatial visualisation of regions and data points</option>
                <option value="owt-classification">owt-classification: OWT Classification</option>
                <option value="tordera-gloria">tordera-gloria: SWAT+, Soil and Water Assessment Tool</option>
                <option value="tordera-gloria-connection">tordera-gloria-connection: SWAT+ output to MITgcm input connection tool</option>
                <option value="riverload">riverload: Compute River Load</option>
                <option value="mitgcm-resultplots">mitgcm-resultplots: Catalunya MITgcm Plotting Tool</option>
                <option value="retrieve-biodiversity-data">retrieve-biodiversity-data: Retrieve biodiversity data from the web</option>
                <option value="match-data">match-data: Combine and match biodiversity data from separate sources</option>
                <option value="check-names">check-names: Check species names in biodiversity data from separate sources</option>
                <option value="pred-extract">pred-extract: pred extract for extraction of environmnetal data</option>
                <option value="multidetect-and-clean">multidetect-and-clean: Run multidetect and clean the data</option>
                <option value="heat1">heat1: Reproducing HEAT HOLAS results: Generate the Assessment Units</option>
                <option value="heat2">heat2: Reproducing HEAT HOLAS results: Combine the samples</option>
                <option value="heat3">heat3: Reproducing HEAT HOLAS results: Compute Annual Indicators</option>
                <option value="heat4">heat4: Reproducing HEAT HOLAS results: Compute Assessment Indicators</option>
                <option value="heat5">heat5: Reproducing HEAT HOLAS results: Compute Assessment</option>
            </param>
            <when value="points-att-polygon">
                <param name="input_data" label="Table to be merged with study region" optional="false" help="URL to the input table containing the in-situ data points with coordinates. Can be provided as Excel file or CSV file (comma-separated text file). The coordinates have to be in WGS84 coordinate system. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="regions" label="Study region or study subregions" optional="false" help="URL to the study region, or several regions, to classify your input data into groups of interest. Currently it has to be provided as a shapefile. It can be in any coordinate system and will be transformed to WGS84 during this process. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colname_long" label="Column name for longitude" optional="true" help="Only needed for data other than geojson. Name of the column that contains longitude values (in WGS84)." type="text"/>
                <param name="colname_lat" label="Column name for latiitude" optional="true" help="Only needed for data other than geojson. Name of the column that contains latitude values (in WGS84)." type="text"/>
            </when>
            <when value="peri-conv">
                <param name="input_data" label="Table to be grouped by date, with date colum" optional="false" help="URL to the table with a column containing a date. It can have other columns which will not be changed during this process. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colname_date" label="Date column name" optional="false" help="Column name of the date column in the format defined above. Example: 'visit_date'." type="text"/>
                <param name="date_format" label="Date format" optional="true" help="The date format used to parse the date, i.e. to extract day, month and year from the date column, e.g.  or 'y-m-d' for dates like '1998-08-22' (this is the default) or 'y/m/d' for dates like '1998/08/22'." type="select">
                    <option value="y-m-d">y-m-d</option>
                    <option value="y/m/d">y/m/d</option>
                </param>
                <param name="group_to_periods" label="Periods to group the data into" optional="true" value="Dec-01:Mar-01,Mar-02:May-30,Jun-01:Aug-30,Sep-01:Nov-30" help="Define the periods that you want the data to be grouped into. Please follow the example (which is also the default): 'Dec-01:Mar-01,Mar-02:May-30,Jun-01:Aug-30,Sep-01:Nov-30' (first three letters of each month, then a minus/hyphen, then the day (two digits), then comma, then the next period)." type="text">
                    <validator type="regex">^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3][0-9]:(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3][0-9](,(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3][0-9]:(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3][0-9])*$</validator>
                </param>
                <param name="period_labels" label="Period labels" optional="true" value="winter,spring,summer,autumn" help="Define names for the periods defined above, separated by a comma. Example (which is also the default): 'winter,spring,summer,autumn'." type="text"/>
                <param name="year_starts_at_dec1" label="Include December into next year" optional="true" checked="True" help="Tell whether the years in your grouping starts at first of December ('true') or not ('false'). This has to be reflected in the period definitions, these should not contradict each other. Defaults to 'true'." type="boolean" truevalue="True" falsevalue="False"/>
            </when>
            <when value="mean-by-group">
                <param name="input_data" label="Input table" optional="false" help="URL to the input table containing group identifier(s) and a value column. Groups are defined by one or more columns. Other columns present in the table will be removed in the process, retaining only those specified in cols_to_group_by and value_col. For example, use the result table from peri_conv. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colnames_to_group_by" label="Column names identifying group" optional="false" help="One or more column names identifying the group. A combination of all specified columns will be used to define unique groups for calculating the average value. Example: 'longitude, latitude, Year_adj_generated, group_labels, HELCOM_ID'. Another example: 'Year_adj_generated, group_labels, HELCOM_ID'." type="text"/>
                <param name="colname_value" label="Column name for column containing values" optional="false" help="The name of the column that contains the values for which the average will be calculated. Only one column name can be included. Examples: 'transparency_m', 'mean'" type="text"/>
            </when>
            <when value="ts-selection-interpolation">
                <param name="input_data" label="Input table" optional="false" help="URL to the input table containing data for selection and interpolation. This table includes grouping variables (if applicable), the year (or other time identifier) and the value columns to be interpolated. For example, use the result from mean_by_group. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colnames_relevant" label="Column names identifying group(s)" optional="false" help="Column name(s) describing relevant values in the dataset. These columns are treated as grouping identifiers, and a combination of all specified columns will be used to define unique groups. For each group, a separate time series is analyzed and interpolated individually." type="text"/>
                <param name="missing_threshold_percentage" label="Threshold for missing values" optional="false" value="80" help="Threshold for the allowed percentage of missing data points (NAs). For example, a value of 80 means series with more than 80% missing data will be removed. Example = '80'." type="float" min="0" max="100"/>
                <param name="colname_year" label="Column name for time" optional="false" help="The name of the column containing the year (or other time identifier, such as quarter, month, or day). Example = 'year'" type="text"/>
                <param name="colname_value" label="Column name for values" optional="false" help="The name of the column containing the values to be considered for interpolation." type="text"/>
                <param name="min_data_point" label="Minimum number of data points required" optional="false" value="10" help="The minimum number of data points required in a time series for it to be included in the interpolation process. Example = '10'." type="integer" min="0"/>
            </when>
            <when value="trend-analysis-mk">
                <param name="input_data" label="Input table" optional="false" help="URL to the input table containing the time series data. This table includes grouping identifiers (if applicable), columns for time (e.g., year, month) and values to be analyzed for trends. For example, use the result from ts_selection_interpolation. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colnames_relevant" label="Column names identifying group(s)" optional="false" help="Column name(s) identifying relevant groups in the dataset. These columns will be used to define unique groups for which separate trend analyses are performed." type="text"/>
                <param name="colname_time" label="Column name for time" optional="false" help="The name of the column containing the time variable (e.g., year, month) to be used in the trend analysis. Example = 'year'." type="text"/>
                <param name="colname_value" label="Column name for values" optional="false" help="The name of the column containing the values to be analyzed in the Man-Kendall trend test." type="text"/>
            </when>
            <when value="barplot-trend-results">
                <param name="input_data" label="Input table" optional="false" help="URL to the input table containing statistical analysis results. The table must include columns for test values, p-values, and group identifiers. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colname_id" label="Column name for identifier" optional="false" help="The name of the column containing group or sample identifiers, e.g., polygon id. Example = 'polygon_id'" type="text"/>
                <param name="colname_test_value" label="Column name for test value" optional="false" help="The name of the column containing the test values (e.g., statistical coefficients) to be plotted on the Y-axis, e.g., Tau for Mann-Kendall test." type="text"/>
                <param name="colname_p_value" label="Column name for p value" optional="false" help="The name of the column containing p values, used to determine bar transparency. Example = 'p_value'" type="text"/>
                <param name="p_value_threshold" label="p value threshold for significance" optional="false" value="0.05" help="The threshold for distinguishing significant from insignificant values. It adjusts the transparency of bars in the plot. Example = '0.05'" type="float" min="0.01" max="0.1"/>
                <param name="colname_group" label="Column name for subgroups" optional="false" help="The name of the column that defines the subgroups or categories to be displayed on the X-axis, e.g., seasons for every polygon_id." type="text"/>
            </when>
            <when value="map-trends-static">
                <param name="regions" label="Study region or study subregions" optional="false" help="URL to the study region, or several regions, used to classify the input data into groups of interest. Currently it has to be provided as a shapefile. It can be in any coordinate system and will be transformed to WGS84 during this process. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="input_data" label="Input table" optional="false" help="URL to the input table containing statistical analysis results. The table must include columns for test values, p-values, and identifiers linking to study region. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colname_id_trend" label="Column name of study region identifier" optional="false" help="The name of the column containing identifiers for study regions, which must correspond to the identifiers in the shapefile (shp). Example = 'id'" type="text"/>
                <param name="colname_region_id" label="Column name of study region identifier" optional="false" help="The name of the column in the input data that contains identifiers for study regions, corresponding to the identifiers in the shapefile. Example = 'id'" type="text"/>
                <param name="colname_group" label="Column name for subgroups" optional="false" help="The name of the column that defines the subgroups or categories to be displayed on the X-axis, e.g., seasons for every polygon_id." type="text"/>
                <param name="p_value_threshold" label="p value threshold for significance" optional="false" value="0.05" help="The threshold for distinguishing significant from insignificant values. It adjusts the transparency of bars in the plot. Default = 0.05" type="float" min="0.01" max="0.1"/>
                <param name="colname_p_value" label="Column name for p value" optional="false" help="The name of the column containing p values, used to determine bar transparency. Example = 'p_value'" type="text"/>
            </when>
            <when value="map-shapefile-points">
                <param name="regions" label="Study region or study subregions" optional="false" help="URL to the study region, or several regions, to classify the input data into groups of interest. Currently it has to be provided as a shapefile. It can be in any coordinate system and will be transformed to WGS84 during this process. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="input_data" label="Data table to be plotted on top of study regions" optional="false" help="URL to the input table containing the in-situ data points with coordinates. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colname_long" label="Column name for longitude" optional="false" help="The name of the column containing longitude values for the data points." type="text"/>
                <param name="colname_lat" label="Column name for latitude" optional="false" help="The name of the column containing latitude values for the data points." type="text"/>
                <param name="colname_value_name" label="Column name of data point identifier" optional="false" help="The name of the column containing identifier (e.g., site name) or values (e.g., depth) to color the points according to their corresponding values." type="text"/>
                <param name="colname_region_id" label="Column name of region identifier" optional="false" help="The name of the column containing identifiers (e.g., basin name) to distinguish the polygons on the map if multiple regions are present." type="text"/>
            </when>
            <when value="owt-classification">
                <param name="input_data_url" label="Input data" optional="false" help="URL to your input file. Find example data on https://github.com/bishun945/pyOWT/tree/main/projects/AquaINFRA. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="input_option" label="Type of input" optional="false" help="csv: for text data input (first line wavelength, following lines remote-sensing reflectance). sat: for satellite product input containing reflectance (e.g., Sentinel-3 OLCI Level-2)" type="select">
                    <option value="csv">csv</option>
                    <option value="sat">sat</option>
                </param>
                <param name="sensor" label="Sensor name" optional="false" help="Spectral band configuration of satellite mission (includes adaptation to sensor spectral response functions)." type="select">
                    <option value="HYPER">HYPER</option>
                    <option value="AERONET_OC_1">AERONET_OC_1</option>
                    <option value="AERONET_OC_2">AERONET_OC_2</option>
                    <option value="CMEMS_BAL_HROC">CMEMS_BAL_HROC</option>
                    <option value="CMEMS_BAL_NRT">CMEMS_BAL_NRT</option>
                    <option value="CMEMS_MED_MYINT">CMEMS_MED_MYINT</option>
                    <option value="CZCS">CZCS</option>
                    <option value="GOCI">GOCI</option>
                    <option value="HawkEye">HawkEye</option>
                    <option value="MERIS">MERIS</option>
                    <option value="ODIS_Aqua">ODIS_Aqua</option>
                    <option value="MODIS_Terra">MODIS_Terra</option>
                    <option value="MSI_S2A">MSI_S2A</option>
                    <option value="MSI_S2B">MSI_S2B</option>
                    <option value="OCTS">OCTS</option>
                    <option value="OLCI_S3A">OLCI_S3A</option>
                    <option value="OLCI_S3B">OLCI_S3B</option>
                    <option value="OLI">OLI</option>
                    <option value="SeaWiFS">SeaWiFS</option>
                    <option value="VIIRS_JPSS1">VIIRS_JPSS1</option>
                    <option value="VIIRS_JPSS2">VIIRS_JPSS2</option>
                    <option value="VIIRS_SNPP">VIIRS_SNPP</option>
                </param>
                <param name="output_option" label="Output option" optional="false" help="1: for standard output with five variables. 2: for extensive output including memberships of all water types." type="select">
                    <option value="1">1</option>
                    <option value="2">2</option>
                </param>
            </when>
            <when value="tordera-gloria">
                <param name="TextInOut_URL" label="URL to project folder" optional="true" help="To model a watershed other than the Tordera Basin, users can add a URL to their own zipped project folder. Important: Do not include the SWAT executable to that folder — AquaINFRA has integrated the executable version rev60.5.7_64rel_linux. Defaults to: https://raw.githubusercontent.com/AmandaBatlle/AquaINFRA_CaseUse_MedInlandModel/refs/heads/main/example_inputs/project.zip" type="data" format="txt"/>
                <param name="par_cal" label="Parameter inputs" optional="true" help="JSON file including the parameter value change as defined by SWATrunR documentation (https://chrisschuerz.github.io/SWATrunR/articles/SWATrunR.html#parameter-inputs). Defaults to: https://raw.githubusercontent.com/AmandaBatlle/AquaINFRA_CaseUse_MedInlandModel/refs/heads/main/example_inputs/par_cal.json" type="data" format="txt"/>
                <param name="unit" label="Output Spatial unit" optional="true" help="Specify the spatial unit for which to generate output. This corresponds to the ID of a river reach, HRU (Hydrologic Response Unit), or LSU (Landscape Unit) as defined in the SWAT+ watershed delineation. The value can be entered as a single number (e.g., 1), a range (e.g., 1:10), or a list of values (e.g., (1, 3, 25, 40)). The default unit for the La Tordera SWAT+ model is 1." type="integer"/>
                <param name="file" label="SWAT Output File" optional="true" help="A SWAT+ output file name consists of three elements separated by underscores (_): spatial unit, output category, and time step. For example, 'channel_sd_day'. The spatial unit defines the scale of the output and can be basin, hru, lsunit, channel, aquifer, or reservoir. The output category specifies the type of data such as wb (water balance), nb (nutrient balance), ls (losses), pw (plant and weather), or sd (streamflow and sediment in channels). The time step indicates the temporal resolution and can be day (daily), mon (monthly), yr (yearly), or aa (annual average). Combining these elements forms the output file name. For more details, refer to the SWAT+ OUTPUT FILES section in the SWAT+ documentation (https://swatplus.gitbook.io/io-docs). Defaults to: 'channel_sd_day'" type="text"/>
                <param name="variable" label="SWAT Variable" optional="true" help="Variable to simulate. Example: 'flo_out'. For more details and extended capabilities, refer to the SWAT+ OUTPUT FILES section in the SWAT+ documentation (https://swatplus.gitbook.io/io-docs). For one variable, enter the variable name. For multiple variables, enter them separated by comma. Defaults to: 'flo_out,water_temp'" type="text"/>
                <param name="start_date" label="Start Date" optional="true" help="Start date of the simulation in the format yyyymmdd. Defaults to: 20160101. Earliest possible date for the default input project: 20000101." type="integer"/>
                <param name="start_date_print" label="Start Date for printing" optional="true" help="Start date for printing in the format yyyymmdd. Defaults to: 20190601. Earliest possible date for the default input project: 20020101. It is recommended to allow for at least two warm-up years between the model simulation start and the model output printing to ensure relevant results." type="integer"/>
                <param name="end_date" label="End Date" optional="true" help="End date of the simulation in the format yyyymmdd. Defaults to: 20201231. Latest possible date for the default input project: 20221231." type="integer"/>
            </when>
            <when value="tordera-gloria-connection">
                <param name="swat_output_file" label="SWAT+ Variable Outputs Database" optional="false" help="SQLite database (thread_1.sqlite) storing the simulated output values based on the defined time range and time step (daily, monthly, yearly, or annual average)." type="data" format="txt"/>
            </when>
            <when value="riverload">
                <param name="input_toc_urls" label="Input Total Organic Carbon (TOC)" optional="false" help="URL to in situ Total Organic Carbon (TOC) data (date, value). If the data is divided into parts, please provide several URLs separated by a comma. All URLs need to start with http(s). (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="input_discharge_urls" label="Input discharge" optional="false" help="URL to the daily discharge data (date, value). If the data is divided into parts, please provide several URLs separated by a comma. All URLs need to start with http(s). (URL must be stored in a .txt file)" type="data" format="txt"/>
            </when>
            <when value="mitgcm-resultplots">
                <param name="netcdf_input_file" label="NetCDF input file" optional="false" help="The NetCDF input file that should be plotted. It must comply to some specific properties with respect to variables. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="variable" label="Variable to be plotted" optional="false" value="T" help="The variable that should be plotted. You can choose between 'T', 'S', 'U', 'V', 'Eta' and 'W'." type="select">
                    <option value="T">T</option>
                    <option value="S">S</option>
                    <option value="U">U</option>
                    <option value="V">V</option>
                    <option value="Eta">Eta</option>
                    <option value="W">W</option>
                </param>
                <param name="plot_time" label="Hour to be plotted" optional="false" value="" help="The time of day (hourly resolution) that should be plotted. Please specify it in this specific format: '20220703-12:00:00'. We assume that the hour is in the simulation period." type="text">
                    <validator type="empty_field" message="You must specify the hour to be plotted."/>
                </param>
                <param name="plot_depth" label="Depth to be plotted" optional="false" value="0.0" help="The depth at which should be plotted. The plot will be at the closest vertical level to this depth. Please specify a negative value. We assume the depth is in the range of our depths." type="float" />
                <param name="min_var_for_color" label="Minimum value to be considered for color limit" optional="true" help="This is a parameter controlling the color limits to display. Example (for salinity): 35. Leave empty to let the color limits be chosen automatically." type="float"/>
                <param name="max_var_for_color" label="Maximum value to be considered for color limit" optional="true" help="This is a parameter controlling the color limits to display. Example (for salinity): 38. Leave empty to let the color limits be chosen automatically." type="float"/>
            </when>
            <when value="retrieve-biodiversity-data">
                <param name="species_names_list" label="Species names to download (list of species)" optional="true" help="List of species. If you have a url to the dataset, use the option below." type="text"/>
                <param name="species_names_url" label="Data with species names to download" optional="true" help="URL to a table containing the occurrences from any source. This can be the outcome of the match-data process. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colname_species" label="Species column name" optional="true" help="If the data provided in the data path is a dataframe, indicate here the column name with species names." type="text"/>
                <param name="databases_array" label="Database names" optional="false" value="" help="List of databases to consider. Only 'gbif, inat, vertnet' supported." type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="gbif_limit" label="Max number (GBIF)" optional="true" help="The maximum number of data points that are being retrieved from GBIF." type="integer"/>
                <param name="inaturalist_limit" label="Max number (iNaturalist)" optional="true" help="The maximum number of data points that are being retrieved from iNaturalist." type="integer"/>
                <param name="vertnet_limit" label="Max number (VertNet)" optional="true" help="The maximum number of data points that are being retrieved from VertNet." type="integer"/>
                <param name="study_area_bbox_object" label="Area of Interest as a bounding box" optional="true" help="A bounding box: {'bbox': [south, west, north, east]} (see https://docs.ogc.org/is/18-062r2/18-062r2.html#bounding-box-value)" type="text"/>
                <param name="study_area_shapefile" label="Area of Interest as shapefile" optional="true" help="URL to the zipped input shapefile containing the study areas inside which you would like to retrieve occurrences. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="study_area_geojson_url" label="Area of Interest as GeoJSON" optional="true" help="URL to the input GeoJSON file containing the study areas inside which you would like to retrieve occurrences. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="study_area_geojson_object" label="Area of Interest as a GeoJSON object" optional="true" help="Directly post the GeoJSON containing the study areas inside which you would like to retrieve occurrences." type="text"/>
                <param name="percentage_correctness" label="Species name correctness in percent" optional="true" help="A number indicating the percentage of species name correctness that should be allowed to be replaced. High values ensure a perfect match from the standard database." type="float"/>
                <param name="synonym_check" label="Consider synonyms from standard databases like FishBase" optional="false" checked="false" help="YES, if synoymns species names should be returned from FishBase during data checks before data download." type="boolean" truevalue="True" falsevalue="False"/>
            </when>
            <when value="match-data">
                <param name="input_datasets" label="Occurrence data (GBIF, iNaturalist, VertNet, or from any other source)" optional="false" help="URL to CSV tables containing the occurrences from GBIF, iNaturalist and VertNet. This can be the outcome of the retrieve-biodiversity-data process. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colnames_species_names_array" label="Column names including species names" optional="false" value="" help="List of column names that indicate the Species Name in each of the provided datasets. Example: 'speciesname, scientificName'" type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="colnames_countries_array" label="Column names including country" optional="false" value="" help="List of column names that indicate the Country in each of the provided datasets. Example: 'JDS4_sampling_ID'" type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="colnames_lat_array" label="Column names including Latitude" optional="false" value="" help="List of column names that indicate the Latitude in each of the provided datasets. Example: 'lat, latitude'" type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="colnames_lon_array" label="Column names including Longitude" optional="false" value="" help="List of column names that indicate the Longitude in each of the provided datasets. Example: 'lon, long, longitude'" type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="colnames_date_array" label="Column names including Dates" optional="false" value="" help="List of column names that indicate the dates in each of the provided datasets. Example: 'Date, sampling_date'" type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
            </when>
            <when value="check-names">
                <param name="species_names_list" label="Species names (List of species)" optional="true" help="List of species names to check from the standard database (FishBase)." type="text"/>
                <param name="species_names_url" label="Species names to check" optional="true" help="URL to a table containing the occurrences from any source. This can be the outcome of the match-data process. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colname_species" label="Column name including species names" optional="true" help="Column name that indicate the Species in the provided dataset. Only required you provide tabular occurrence data. Example: 'species'" type="text"/>
                <param name="percent_correctness" label="Species name correctness in percent" optional="false" value="0.0" help="An number indicating the percentage of species name correctness that should be allowed to be replaced. High values ensure a perfect match from the standard database." type="float"/>
                <param name="bool_merge" label="Merge checked species names on the dataset" optional="false" checked="false" help="If a dataframe with species names is provided and this is YES, then all columns are returned. For only one species, the merge will be set automatically to false." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="bool_synonym" label="Synonym checks" optional="false" checked="false" help="Return synonym checks from FishBase." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="bool_ecosystem_type" label="Output ecosytem types" optional="false" checked="false" help="Return ecoystem checks for species from FishBase including marine, freshwater, and brackish." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="bool_rm_duplicates" label="Remove duplicates" optional="false" checked="false" help="Remove duplicates from the dataset." type="boolean" truevalue="True" falsevalue="False"/>
            </when>
            <when value="pred-extract">
                <param name="input_data" label="Occurrence data" optional="false" help="URL to a table containing the occurrences from any source. This can be the outcome of the check-names process. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="input_raster_url_or_name" label="Input raster" optional="false" value="" help="URL of a cloud-optimized raster to be used (will not be downloaded but accessed remotely), or the name of one of the static rasters on the server. Currently only worldclim is available, for testing purposes." type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="study_area_extent" label="Area of Interest as a Shapefile" optional="true" help="URL to the input shapefile containing the study areas inside which you would like to delineate the bounding box or extent for study area. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="study_area_geojson_url" label="Area of Interest as a GeoJSON" optional="true" help="URL to the input GeoJSON file containing the study areas inside which you would like to delineate the bounding box or extent for study area. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="study_area_geojson_object" label="Area of Interest as GeoJSON object" optional="true" help="Directly post the GeoJSON containing the study areas inside which you would like to delineate the bounding box or extent for study area." type="text"/>
                <param name="colname_lat" label="Column name including Latitude" optional="false" value="" help="Column name lat. Example: 'decimalLatitude'" type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="colname_lon" label="Column name including Longitude" optional="false" value="" help="Column name lon. Example: 'decimalLongitude'" type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="colname_species" label="Column name including Species" optional="false" value="" help="Column name species. Example: 'speciescheck'" type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="mininmum_sprecords" label="Minimum points to consider from the species dataset" optional="false" value="0" help="The minimum number of points in a dataset to allow species retained in the dataset. Example: 10, is species dataset is less than 10 records will be ignored." type="integer"/>
                <param name="bool_merge" label="Merge extracted data" optional="false" checked="false" help="This allows to return the full dataset with merged with extracted data" type="boolean" truevalue="True" falsevalue="False"/>
                <param name="bool_list" label="Return a list of dataframe for species if they are more than 1" optional="false" checked="false" help="For multiple species, if set to TRUE, then a list of species dataframes are returned. If false, then a dataframe is returned." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="bool_coords" label="Attach the original coordinates" optional="false" checked="false" help="If YES, the original records used for data extraction will be attached on the dataset. Easy to tract the slight changes in coordinates during data extraction." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="bool_remove_nas" label="Remove NAs after data extraction" optional="false" checked="false" help="If YES, the NAs created when the coordinates did not retrun records and left in the dataset." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="bool_remove_duplicates" label="Remove duplicates" optional="false" checked="false" help="If YES, duplicates records will be removed." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="minimum_sprecordsallow" label="Minimum species records to consider before using data in species distribution modeling" optional="false" checked="false" help="In species distribution models, species with fewer records needs to be removed but normal data extraction this parameter should be set to FALSE to retain all species." type="boolean" truevalue="True" falsevalue="False"/>
            </when>
            <when value="multidetect-and-clean">
                <param name="input_data" label="Input table" optional="false" help="URL to the input table containing the data to be cleaned from outliers: Data sets for multiple or single species from pred_extract and other sources. (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="colname_variable" label="Variable of interest" optional="false" value="" help="Column name identifying the variable of interest where outliers will be checked from for univariate outlier detection methods such as Z-score, mixed interquantile range, reverse jackknifing" type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="select_columns" label="Specify columns to be checked" optional="true" help="In a multivariate dataset, if only particular columns needs to be checked then they should be indicated here. Otherwise all columns will be considerd in outlier detection." type="text"/>
                <param name="multiple_species" label="Distinguish between multiple species" optional="false" checked="false" help="If NO, then only a single species dataset is expected. " type="boolean" truevalue="True" falsevalue="False"/>
                <param name="output_type" label="Output type" optional="false" value="" help="Set whether you want to return outliers or clean dataset. Example: outlier." type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="group_colname" label="Column name including group names" optional="true" help="For multiple groups in a dataframe, provide the column name containing the groups to be checked. For example, a column name with species name in a dataset." type="text"/>
                <param name="colname_exclude" label="Column names to exclude" optional="true" help="Exclude mainly numerical variables that are not necessary in the analysis, for example x and y columns or latitude/longitude, row numbers or serial IDs. Categorical variables are removed automatically in the data preparation." type="text"/>
                <param name="methods" label="Outlier detection methods" optional="false" value="" help="The name of methods for Outlier detection to be used, as a comma-separated string. Example = 'mixediqr, logboxplot, iqr, distboxplot, jknife, semiqr, hampel, iforest, lof, mahal'." type="text">
                    <validator type="empty_field" message="You must provide a value."/>
                </param>
                <param name="silence_true_errors" label="Silence methods that geuninely fail during outlier detection" optional="false" checked="false" help="If YES, silence errors for methods that genuinely druing the outlier detection process but continue without breaking other methods." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="boot_run" label="Bootstrapping execution" optional="false" checked="false" help="If set to YES, then bootstrapping will be done for small samples." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="boot_maxrecords" label="Maximum records to intiate bootstrapping" optional="true" help="The user can adjust the maximum records were to be be bootstrapped." type="integer"/>
                <param name="number_of_boots" label="Number of bootstraps" optional="true" help="The number of bootstraps to generate during bootstrapping." type="integer"/>
                <param name="setseed" label="Set seed" optional="false" value="0" help="During bootstrapping, random samples are generated that requires to set a seed for reproducibility." type="integer"/>
                <param name="boot_threshold" label="Threshold value to flag a record from bootstrap samples" optional="true" help="As a record is flagged in multiple bootstrap samples, a threshold is required to extract an outlier. For instance, 0.6, meaning if a record is flagged 6 of the 10 bootsraps, will be flagged as an outlier." type="float"/>
                <param name="exceute_pca" label="Execute Principal Component Analysis" optional="false" checked="false" help="If true, then PCA will be intiated." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="number_of_pca" label="Number of principal components to retain" optional="true" help="The user can indicate the maximum number of principal components to retain in the outlier detection." type="integer"/>
                <param name="pca_silence" label="Hide messages during PCA analysis" optional="true" help="Messages during PCA analyis are returned if set to NO. YES means to silence the messages like the variance explained from the total PCs selected." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="pcavariable" label="Select the principal component among the PCs retained to be used as variable of interest" optional="true" help="PC1 is the variable selected for outlier detection. Other PC2 can can also be set. PC1 is advisable." type="text"/>
                <param name="sdm_data" label="Change the outlier detection routine based on data type" optional="false" checked="false" help="If it is univariate data, then set to NO. All data that requires multivariate analysis such as using kmeans, isolation forest, set to YES." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="inform_na_outlier" label="Hide messages for removing NAs" optional="true" help="If set to YES, NAs removed will be displayed for each group variable." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="missingness" label="Percentage missing values in a group" optional="true" help="Allowed missing values in a column to allow a user decide whether to remove the individual columns or rows from the data sets. For instance, 0.1: If a column has more than 10 % missing values, then it will be removed from the dataset rather than the rows." type="float"/>
                <param name="classify_or_autoremove" label="Either use outlier classification or autoremoval with threshold or LOESS method" optional="false" checked="false" help="The parameter allows to switch from outlification that labels all records as perfect outlier to fair outliers to allows further scrutiny. Otherwise, the outliers will be dropped based on a threshold set naively or using LOESS method." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="bool_loess" label="Data extraction parameter: LOESS" optional="true" help="If set to true then the local regression method for data extraction will be used" type="boolean" truevalue="True" falsevalue="False"/>
                <param name="threshold_clean" label="Data extraction parameter: Threshold setting" optional="true" help="If Data extraction parameter: LOESS is NO, then a threshold value can be provided. The threshold will significantly determine which is flagged as an absolute outlier." type="float"/>
                <param name="outlierweights_mode" label="Data extraction parameter: Outlier weighting" optional="true" help="Outlier weighting and selection methods. abs uses record proportional to identify an absolute outlier. Example: abs" type="text"/>
                <param name="classifymode" label="Cuts data into classess" optional="true" help="Categorize data base on the correlation coefficient manner based on Akoglu 2018. Example: med" type="integer"/>
                <param name="eif_bool" label="Emprical Influence Function" optional="true" help="Computes the Emprical Influence Function for classified outliers." type="boolean" truevalue="True" falsevalue="False"/>
            </when>
            <when value="heat1">
                <param name="assessment_period" label="Assessment Period" optional="false" help="The HOLAS assessment period which is used for the HEAT analysis. It can be HOLAS-2 (2011-2016) or HOLAS-3 (2016-2021) or custom for whole monitoring period (1877-9999). Note that assessment period defines the indicators used and integrations rules, where as the sample data needs to be defined separately. Allowed values: HOLAS-2, HOLAS-3, Other." type="select">
                    <option value="HOLAS-2">HOLAS-2</option>
                    <option value="HOLAS-3">HOLAS-3</option>
                    <option value="Other">Other</option>
                </param>
            </when>
            <when value="heat2">
                <param name="assessment_period" label="Assessment Period" optional="false" help="The HOLAS assessment period which is used for the HEAT analysis. It can be HOLAS-2 (2011-2016) or HOLAS-3 (2016-2021) or custom for whole monitoring period (1877-9999). Note that assessment period defines the indicators used and integrations rules, where as the sample data needs to be defined separately. Allowed values: HOLAS-2, HOLAS-3, Other." type="select">
                    <option value="holas-2">holas-2</option>
                    <option value="holas-3">holas-3</option>
                    <option value="Other">Other</option>
                </param>
                <param name="bottle_data" label="Input samples (bottle)" optional="true" help="In-situ bottle sample data (nutrients, chl-a)(StationSamplesBOT.csv). ICES Oceanographic data type 'BOT'. Write 'default' for default data. Leave out for none at all." type="text"/>
                <param name="ctd_data" label="Input samples (CTD)" optional="true" help=" CTD depth profiles, relevant for oxygen debt indicator (StationSamplesCTD.csv). ICES Oceanographic data type 'CTD'. Write 'default' for default data. Leave out for none at all." type="text"/>
                <param name="pump_data" label="Input samples (pump)" optional="true" help="Ferrybox data (chl-a) (StationSamplesPMP.csv). ICES Oceanographic data type 'PMP'. Write 'default' for default data. Leave out for none at all." type="text"/>
                <param name="units_gridded" label="Gridded assessment units" optional="false" help="Grid used for confidence assessment (10k, 30k, 60k). These are generated by heat1. For default spatial units for each assessment period, write 'default'. (URL must be stored in a .txt file)" type="data" format="txt"/>
            </when>
            <when value="heat3">
                <param name="assessment_period" label="Assessment Period" optional="false" help="The HOLAS assessment period which is used for the HEAT analysis. It can be HOLAS-2 (2011-2016) or HOLAS-3 (2016-2021) or custom for whole monitoring period (1877-9999). Note that assessment period defines the indicators used and integrations rules, where as the sample data needs to be defined separately. Allowed values: HOLAS-2, HOLAS-3, Other." type="select">
                    <option value="HOLAS-2">HOLAS-2</option>
                    <option value="HOLAS-3">HOLAS-3</option>
                    <option value="Other">Other</option>
                </param>
                <param name="station_samples" label="Station samples (combined)" optional="false" help="Merged sample data of BOT, PMP and CTD data for indicator calculation (StationSamples.csv). (URL must be stored in a .txt file)" type="data" format="txt"/>
                <param name="combined_Chlorophylla_IsWeighted" label="Chlorophyll A Weighted?" optional="false" checked="false" help="If true, script calculates combined chlorophyll a indicator results per assessment unit as a weighted average from BOT, PMP and EO. If false, then simple average." type="boolean" truevalue="True" falsevalue="False"/>
                <param name="units_cleaned" label="Non-gridded assessment units" optional="false" help="Non-gridded Assessment units which were cleaned, slightly adapted (e.g. some stations added manually), and extended (e.g. with the units's area and a UnitID). (URL must be stored in a .txt file)" type="data" format="txt"/>
            </when>
            <when value="heat4">
                <param name="assessment_period" label="Assessment Period" optional="false" help="The HOLAS assessment period which is used for the HEAT analysis. It can be HOLAS-2 (2011-2016) or HOLAS-3 (2016-2021) or custom for whole monitoring period (1877-9999). Note that assessment period defines the indicators used and integrations rules, where as the sample data needs to be defined separately. Allowed values: HOLAS-2, HOLAS-3, Other." type="select">
                    <option value="HOLAS-2">HOLAS-2</option>
                    <option value="HOLAS-3">HOLAS-3</option>
                    <option value="Other">Other</option>
                </param>
                <param name="annual_indicators" label="Annual Indicators Table" optional="false" help="CSV file of calculated HEAT EQRS per indicator per year per assessment unit (AnnualIndicators.csv). (URL must be stored in a .txt file)" type="data" format="txt"/>
            </when>
            <when value="heat5">
                <param name="assessment_period" label="Assessment Period" optional="false" help="The HOLAS assessment period which is used for the HEAT analysis. It can be HOLAS-2 (2011-2016) or HOLAS-3 (2016-2021) or custom for whole monitoring period (1877-9999). Note that assessment period defines the indicators used and integrations rules, where as the sample data needs to be defined separately. Allowed values: HOLAS-2, HOLAS-3, Other." type="select">
                    <option value="HOLAS-2">HOLAS-2</option>
                    <option value="HOLAS-3">HOLAS-3</option>
                    <option value="Other">Other</option>
                </param>
                <param name="assessment_indicators" label="Assessment Indicators Table" optional="false" help="CSV file of calculated EQRS per assessment period per assessment unit (AssessmentIndicators.csv). (URL must be stored in a .txt file)" type="data" format="txt"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output_data" format="txt" label="$select_process"/>
    </outputs>
    <expand macro="tests"/>
    <help>Use the dropdown menu at the top to select the OGC API processes hosted on https://aquainfra.ogc.igb-berlin.de/pygeoapi/ and then complete the corresponding form to run the service.</help>
    <expand macro="citations"/>
</tool>
author	ecology
date	Thu, 02 Oct 2025 15:57:02 +0000
parents	f708bf8ccfa7
children