changeset 0:4de886e6300d draft

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/ocean commit a7e53c429cf93485aba692b928defe6ee01633d6
author ecology
date Tue, 22 Oct 2024 15:55:13 +0000
parents
children 4edd010161e9
files argo_getdata.py check.py copernicusmarine.xml divandfull.jl macro.xml test-data/argo_data.netcdf test-data/data_from_Eutrophication_Med_profiles_2022_unrestricted.nc
diffstat 7 files changed, 628 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/argo_getdata.py	Tue Oct 22 15:55:13 2024 +0000
@@ -0,0 +1,191 @@
+# author: Marie Jossé
+
+# Python script
+
+#############################
+#      Argo data access     #
+#############################
+
+# Packages : argopy
+
+
+# Load arguments
+import argparse
+import sys
+
+import argopy
+
+command_line_args = sys.argv[1:]
+
+
+parser = argparse.ArgumentParser(description="Retrieve argo Data")
+# Add arguments
+
+parser.add_argument("--user", type=str,
+                    help="User mode : standard, expert or research")
+parser.add_argument("--cardinal_1", type=float, help="Longitude min")
+parser.add_argument("--cardinal_2", type=float, help="Longitude max")
+parser.add_argument("--cardinal_3", type=float, help="Latitude min")
+parser.add_argument("--cardinal_4", type=float, help="Latitude max")
+parser.add_argument("--pressure_1", type=float, help="Pressure min")
+parser.add_argument("--pressure_2", type=float, help="Pressure max")
+parser.add_argument("--date_1", type=str, help="Starting date")
+parser.add_argument("--date_2", type=str, help="Ending date.")
+parser.add_argument("--wmo", type=str, help="WMO: argo's identifier")
+parser.add_argument("--profile", type=str, help="Number of profiles")
+parser.add_argument("--params", type=str, help="List of bgc parameters")
+parser.add_argument("--measured", type=str, help="List of bgc parameters")
+parser.add_argument("--output_argo", type=str, help="Output data from argo")
+
+args = parser.parse_args(command_line_args)
+
+
+# Parse the command line arguments
+
+print(args)
+# Import data
+
+user = args.user
+cardinal_1 = args.cardinal_1
+cardinal_2 = args.cardinal_2
+cardinal_3 = args.cardinal_3
+cardinal_4 = args.cardinal_4
+pressure_1 = args.pressure_1
+pressure_2 = args.pressure_2
+date_1 = args.date_1
+date_2 = args.date_2
+wmo = args.wmo
+if wmo is not None:
+    wmo = list(map(int, wmo.split(",")))
+profile = args.profile
+if profile is not None:
+    profile = list(map(int, profile.split(",")))
+params = args.params
+if params is not None:
+    params = params.split(",")
+    if len(params) == 83:
+        params = "all"
+measured = args.measured
+if measured is not None:
+    measured = measured.split(",")
+
+# Let’s import the argopy data fetcher:
+
+######################
+#       User mode    #
+######################
+# By default,
+# all argopy data fetchers are set to work with a standard user mode.
+# To change that
+
+argopy.set_options(mode=user)
+
+######################
+# Fetching Argo data #
+######################
+# Data selection #
+
+# To access Argo data with a DataFetcher,
+# you need to define how to select your data of interest.
+# argopy provides 3 different data selection methods:
+
+argo_data = argopy.DataFetcher()
+
+# 🗺 For a space/time domain #
+
+if (cardinal_1 is not None):
+    mode = "region"
+    argo_data = argo_data.region([cardinal_1, cardinal_2,
+                                  cardinal_3, cardinal_4,
+                                  pressure_1, pressure_2,
+                                  date_1, date_2])
+
+# ⚓ For one or more profiles #
+# Use the fetcher access point argopy.DataFetcher.profile()
+# to specify the float WMO platform number
+# and the profile cycle number(s) to retrieve profiles for.
+elif (wmo is not None and profile is not None):
+    argo_data = argo_data.profile(wmo, profile)
+    # can also be argo_data = argo_data.profile(6902755, [3, 12])
+    mode = "profile"
+
+# 🤖 For one or more floats #
+# If you know the Argo float unique identifier number called a WMO number
+# you can use the fetcher access point DataFetcher.float()
+# to specify one or more float WMO platform numbers to select.
+else:
+    argo_data = argo_data.float(wmo)
+    # can also be argo_data = argo_data.float([6902746, 6902755])
+    mode = "float"
+
+# Data sources #
+# Let’s start with standard import:
+# argopy.reset_options()
+# Specify data source erddap, gdac or argovis
+
+# if (ftp != "") :
+    # argopy.set_options(src = "gdac", ftp = ftp)
+# else :
+    # argopy.set_options(src = "erddap")
+
+# With remote, online data sources,
+# it may happens that the data server is experiencing down time.
+print(argopy.status())
+
+# Dataset #
+# Argo data are distributed as a single dataset.
+# It is referenced at https://doi.org/10.17882/42182.
+# But they are several Argo missions with specific files and parameters
+# that need special handling by argopy, namely:
+#   - the core Argo Mission: from floats that measure temperature,
+#     salinity, pressure down to 2000m,
+#   - the Deep Argo Mission: from floats that measure temperature,
+#     salinity, pressure down to 6000m,
+#   - and the BGC-Argo Mission: from floats that measure temperature,
+#     salinity, pressure and oxygen, pH, nitrate, chlorophyll,
+#     backscatter, irradiance down to 2000m.
+# You can choose between phy or bgc
+if (params is None):
+    argopy.set_options(dataset="phy")
+else:
+    argopy.set_options(dataset="bgc")
+    if (measured != ['None'] and measured is not None):
+        argo_data = argopy.DataFetcher(params=params, measured=measured)
+        if (mode == "region"):
+            argo_data = argo_data.region([cardinal_1, cardinal_2,
+                                          cardinal_3, cardinal_4,
+                                          pressure_1, pressure_2,
+                                          date_1, date_2])
+        elif (mode == "profile"):
+            argo_data = argo_data.profile(wmo, profile)
+        else:
+            argo_data = argo_data.float(wmo)
+    else:
+        argo_data = argopy.DataFetcher(params=params, measured=None)
+        if (mode == "region"):
+            argo_data = argo_data.region([cardinal_1, cardinal_2,
+                                          cardinal_3, cardinal_4,
+                                          pressure_1, pressure_2,
+                                          date_1, date_2])
+        elif (mode == "profile"):
+            argo_data = argo_data.profile(wmo, profile)
+        else:
+            argo_data = argo_data.float(wmo)
+
+# Data fetching #
+# To fetch (i.e. access, download, format) Argo data,
+# argopy provides the DataFetcher class.
+# Several DataFetcher arguments exist to help you select the dataset,
+# the data source and the user mode the most suited for your applications;
+# and also to improve performances.
+
+# You define the selection of data you want to fetch
+# with one of the DataFetcher methods: region, float or profile.
+# 2 lines to download Argo data: import and fetch !
+
+argo_data = argo_data.load().data
+argo_data.to_netcdf("argo_data.nc")
+
+# argo_metadata = argo_data.to_index()
+
+print(argo_data)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/check.py	Tue Oct 22 15:55:13 2024 +0000
@@ -0,0 +1,88 @@
+import os
+import subprocess
+import sys
+
+
+def validate_command(command):
+    # Example validation: Ensure the command does not contain
+    # potentially dangerous substrings
+    forbidden_substrings = ["rm",
+                            "rm -rf",
+                            "sudo",
+                            "dd if=",
+                            "curl",
+                            "wget",
+                            ";",
+                            "&&"]
+    for substring in forbidden_substrings:
+        if substring in command:
+            message = f"Error: Command has forbidden substring '{substring}'"
+            return False, message
+
+    # Check if the command starts with 'copernicusmarine'
+    if not command.startswith("copernicusmarine"):
+        return False, "Error: Command must start with 'copernicusmarine'"
+
+    # Remove 'copernicusmarine' from the start
+    command = command[len("copernicusmarine"):].strip()
+
+    # Check for specific commands and their arguments
+    if command.startswith("subset"):
+        # Check for required arguments for 'subset' command
+        if not ("--dataset-id" in command or "--dataset-url" in command):
+            message = (
+                "Error: 'subset' command must have '--dataset-id' or "
+                "'--dataset-url'"
+            )
+            return False, message
+    elif command.startswith("get"):
+        # Check for required arguments for 'get' command
+        if not ("--dataset-id" in command or "--dataset-url" in command):
+            message = (
+                "Error: 'get' command must have '--dataset-id' or "
+                "'--dataset-url'"
+            )
+            return False, message
+    elif command.startswith("login") or command.startswith("describe"):
+        message = "This tool only accepts 'subset' and 'get' commands."
+        return False, message
+    else:
+        return False, "Error: Command must be 'subset' or 'get'"
+
+    return True, None
+
+
+def main():
+    # Check if a filename argument is provided
+    if len(sys.argv) != 2:
+        print("Usage: python check.py <config_file>")
+        sys.exit(1)
+
+    # Get the filename from the command line argument
+    config_file = sys.argv[1]
+
+    # Check if the file exists
+    if not os.path.isfile(config_file):
+        print(f"Error: File '{config_file}' does not exist.")
+        sys.exit(1)
+
+    # Read the content of the file
+    with open(config_file, "r") as file:
+        command = file.read().strip()
+
+    # Validate the command
+    is_valid, error_message = validate_command(command)
+    if not is_valid:
+        print(error_message)
+        sys.exit(1)
+
+    # Execute the command
+    try:
+        subprocess.run(command, shell=True, check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Error: Command failed with exit code {e.returncode}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/copernicusmarine.xml	Tue Oct 22 15:55:13 2024 +0000
@@ -0,0 +1,114 @@
+<tool id="copernicusmarine" name="Copernicue Marine Data Store" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01" license="MIT">
+    <description>retrieve marine data</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.3.3</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">copernicusmarine</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+    #import re
+        #set $cmems_username = $__user__.extra_preferences.get('cmems_username', "")
+        #set $cmems_password = $__user__.extra_preferences.get('cmems_password', "")
+
+        #if $cmems_username == "" or $cmems_password == ""
+            #set $cmems_username = os.getenv('CMEMS_USERNAME', '')
+            #set $cmems_password = os.getenv('CMEMS_PASSWORD', '')
+
+            echo "Using global ENVs" &&
+
+            #if re.match(r"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d).{8,}$", $cmems_password):
+                echo "Password meets the required criteria" &&
+            #else:
+                echo "Password does not meet the required criteria" &&
+            #end if
+
+        #end if
+
+        #if $cmems_username != ""
+            echo 'Error. Set your CMEMS credentials via: User -> Preferences -> Manage Information'  &&
+        #end if
+        
+        #set $filename="config_file_with_my_command"
+        echo -e '$input_text' > $filename --force-download --username $cmems_username --password $cmems_password &&
+
+        python '$__tool_directory__/check.py' $filename
+    ]]></command>
+    <configfiles>
+        <configfile name="cmems_credentials">
+            #set $cmems_username = $__user__.extra_preferences.get('cmems_username', "")
+            #set $cmems_password = $__user__.extra_preferences.get('cmems_password', "")
+            cmems_username: $cmems_username
+            cmems_password: $cmems_password
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="input_text" label="Paste API Request" type="text" area="true">
+            <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits">
+                    <add value="_" />
+                    <add value="-" />
+                    <add value=" " />
+                    <add value=":" />
+                    <add value="." />
+                    <add value=" " />
+                </valid>
+            </sanitizer>
+            <validator type="regex">[0-9a-zA-Z_:\-\. ]+</validator>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output_netcdf" label="Copernicus marine data" from_work_dir="./*.nc" format="netcdf"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_text" value="copernicusmarine subset --dataset-id cmems_mod_med_phy_my_4.2km-climatology_P1M-m --variable bottomT_avg --variable bottomT_std --variable mlotst_avg --variable mlotst_std --variable so_avg --variable so_std --variable thetao_avg --variable thetao_std --variable uo_avg --variable uo_std --variable vo_avg --variable vo_std --variable zos_avg --variable zos_std --start-datetime 1993-12-01T00:00:00 --end-datetime 1993-12-01T00:00:00 --minimum-longitude 10.283266521135577 --maximum-longitude 12.139348881644054 --minimum-latitude 38.461936025366256 --maximum-latitude 39.82957565942514 --minimum-depth 1.0182366371154785 --maximum-depth 1.0182366371154785"/>
+            <output name="output_netcdf">
+                <assert_contents>
+                    <has_text text="_NCProperties"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+============================
+Copernicus Marine Data Store
+============================
+
+** Context **
+This tool is a wrapper to retrieve data from the Copernicus Marine Environment Monitoring Service (CMEMS).
+
+- It allows to retrieve data from the Copernicus Marine Service.
+- Any user willing to use this tool needs to `create a new account <https://data.marine.copernicus.eu/login>`_.
+- Set your Copernicus CMEMS API Key via: User > Preferences > Manage Information
+- Enter your username and password for Copernicus CMEMS
+- Compose your request directly on Copernicus Marine Data Store 
+    - Choose there which data interest you click on the download button
+    - Then on the top right click again on the big download butto
+    - Log in
+    - Click on "Automate"
+    - You should have a pop-up window called "Automate download"
+    - Copy the ">_Command-Line Interface" proposed there
+- Back on Galaxy paste it in the input field "Paste API Request".
+
+For more information on the Command-Line Interface (CLI) go on `Copernicus Marine Toolbox CLI - Subset <https://help.marine.copernicus.eu/en/articles/7972861-copernicus-marine-toolbox-cli-subset>`
+
+** Input **
+Command line from the Copernicus marine services copy paste as a text.
+
+** Output **
+A netcdf file containing the the data chose by the user from the Copernicus Marine Data Store.
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex"> 
+        @misc{Copernicus, 
+        title={Copernicus Marine Data Store}, 
+        url={https://data.marine.copernicus.eu/products}, 
+        note={E.U. Copernicus Marine Service Information}, 
+        author={Copernicus, E.U.} 
+        }
+        </citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/divandfull.jl	Tue Oct 22 15:55:13 2024 +0000
@@ -0,0 +1,211 @@
+#Julia script
+
+###############################
+##    DIVAndrun analsysis    ##
+###############################
+import Pkg; 
+using Pkg
+Pkg.status()
+
+### Import packages
+using DIVAnd
+using Dates
+using Printf
+# Getting the arguments from the command line
+args = ARGS
+
+# Import data
+if length(args) < 4
+    error("This tool needs at least 4 arguments")
+else
+    netcdf_data = args[1]
+    longmin = parse(Float64, args[2])
+    longmax = parse(Float64, args[3])
+    latmin = parse(Float64, args[4])
+    latmax = parse(Float64, args[5])
+    startdate = args[6] # yyyy,mm,dd
+    enddate = args[7]
+    varname = args[8]
+    selmin = parse(Float64, args[9])
+    selmax = parse(Float64, args[10])
+    bathname = args[11]
+end
+
+## This script will create a climatology:
+# 1. ODV data reading.
+# 2. Extraction of bathymetry and creation of mask
+# 3. Data download from other sources and duplicate removal.
+# 4. Quality control.
+# 5. Parameter optimisation.
+# 6. Spatio-temporal interpolation with DIVAnd.
+
+
+### Configuration
+# Define the horizontal, vertical (depth levels) and temporal resolutions.
+# Select the variable of interest
+
+dx, dy = 0.125, 0.125
+lonr = longmin:dx:longmax
+latr = latmin:dy:latmax
+
+# Convert string in date
+startdate = Date(startdate, "yyyy-mm-dd")
+
+# extract year month day
+startyear = year(startdate)
+startmonth = month(startdate)
+startday = day(startdate)
+
+# Convert string in date
+enddate = Date(enddate, "yyyy-mm-dd")
+
+# extract year month day
+endyear = year(enddate)
+endmonth = month(enddate)
+endday = day(enddate)
+
+timerange = [Date(startyear, startmonth, startday),Date(endyear, endmonth, endday)];
+
+depthr = [0.,5., 10., 15., 20., 25., 30., 40., 50., 66, 
+    75, 85, 100, 112, 125, 135, 150, 175, 200, 225, 250, 
+    275, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 
+    800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200, 1250, 
+    1300, 1350, 1400, 1450, 1500, 1600, 1750, 1850, 2000];
+depthr = [0.,10.,20.];
+
+varname = varname
+yearlist = [1900:2023];
+monthlist = [[1,2,3],[4,5,6],[7,8,9],[10,11,12]];
+
+# We create here the variable TS (for "tDataset(netcdf_data,"r")ime selector"), which allows us to work with the observations corresponding to each period of interest.
+
+TS = DIVAnd.TimeSelectorYearListMonthList(yearlist,monthlist);
+@show TS;
+
+figdir = "outputs/"
+if ~(isdir(figdir))
+    mkdir(figdir)
+else
+    @info("Figure directory already exists")
+end
+### 1. Read your ODV file
+# Adapt the datadir and datafile values.
+# The example is based on a sub-setting of the Mediterranean Sea aggregated dataset.
+# The dataset has been extracted around the Adriatic Sea and exported to a netCDF using Ocean Data 
+datadir = "../data"
+
+datafile = netcdf_data
+
+# Then you can read the full file:
+@time obsval,obslon,obslat,obsdepth,obstime,obsid = NCODV.load(Float64, datafile, 
+    "Water body $(varname)");
+
+# Check the extremal values of the observations
+checkobs((obslon,obslat,obsdepth,obstime),obsval,obsid)
+
+### 2. Extract the bathymetry
+
+# It is used to delimit the domain where the interpolation is performed.
+## 2.1 Choice of bathymetry
+
+# Modify bathname according to the resolution required.
+
+@time bx,by,b = load_bath(bathname,true,lonr,latr);
+
+## 2.2 Create mask
+# False for sea
+# True for land
+
+mask = falses(size(b,1),size(b,2),length(depthr))
+for k = 1:length(depthr)
+    for j = 1:size(b,2)
+        for i = 1:size(b,1)
+            mask[i,j,k] = b[i,j] >= depthr[k]
+        end
+    end
+end
+@show size(mask)
+
+### 3. Quality control
+# We check the salinity value.
+# Adapt the criteria to your region and variable.
+
+sel = (obsval .<= selmax) .& (obsval .>= selmin);
+
+obsval = obsval[sel]
+obslon = obslon[sel]
+obslat = obslat[sel]
+obsdepth = obsdepth[sel]
+obstime = obstime[sel]
+obsid = obsid[sel];
+
+### 4. Analysis parameters
+# Correlation lengths and noise-to-signal ratio
+
+# We will use the function diva3D for the calculations.
+# With this function, the correlation length has to be defined in meters, not in degrees.
+
+sz = (length(lonr),length(latr),length(depthr));
+lenx = fill(100_000.,sz)   # 100 km
+leny = fill(100_000.,sz)   # 100 km
+lenz = fill(25.,sz);      # 25 m 
+len = (lenx, leny, lenz);
+epsilon2 = 0.1;
+
+### Output file name
+outputdir = "outputs_netcdf/"
+if !isdir(outputdir)
+    mkpath(outputdir)
+end
+filename = joinpath(outputdir, "Water_body_$(replace(varname," "=>"_")).nc")
+
+### 7. Analysis
+# Remove the result file before running the analysis, otherwise you'll get the message
+if isfile(filename)
+    rm(filename) # delete the previous analysis
+    @info "Removing file $filename"
+end
+
+## 7.1 Plotting function
+# Define a plotting function that will be applied for each time index and depth level.
+# All the figures will be saved in a selected directory.
+     
+function plotres(timeindex,sel,fit,erri)
+    tmp = copy(fit)
+    nx,ny,nz = size(tmp)
+    for i in 1:nz
+        figure("Additional-Data")
+        ax = subplot(1,1,1)
+        ax.tick_params("both",labelsize=6)
+        ylim(39.0, 46.0);
+        xlim(11.5, 20.0);
+        title("Depth: (timeindex)", fontsize=6)
+        pcolor(lonr.-dx/2.,latr.-dy/2, permutedims(tmp[:,:,i], [2,1]);
+               vmin = 33, vmax = 40)
+        colorbar(extend="both", orientation="vertical", shrink=0.8).ax.tick_params(labelsize=8)
+
+        contourf(bx,by,permutedims(b,[2,1]), levels = [-1e5,0],colors = [[.5,.5,.5]])
+        aspectratio = 1/cos(mean(latr) * pi/180)
+        gca().set_aspect(aspectratio)
+        
+        figname = varname * @sprintf("_%02d",i) * @sprintf("_%03d.png",timeindex)
+        plt.savefig(joinpath(figdir, figname), dpi=600, bbox_inches="tight");
+        plt.close_figs()
+    end
+end
+
+## 7.2 Create the gridded fields using diva3d
+# Here only the noise-to-signal ratio is estimated.
+# Set fitcorrlen to true to also optimise the correlation length.
+@time dbinfo = DIVAnd.diva3d((lonr,latr,depthr,TS),
+    (obslon,obslat,obsdepth,obstime), obsval,
+    len, epsilon2,
+    filename,varname,
+    bathname=bathname,
+    fitcorrlen = false,
+    niter_e = 2,
+    surfextend = true
+    );
+
+# Save the observation metadata in the NetCDF file.
+DIVAnd.saveobs(filename,(obslon,obslat,obsdepth,obstime),obsid);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macro.xml	Tue Oct 22 15:55:13 2024 +0000
@@ -0,0 +1,24 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.1.15</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="argo_requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">argopy</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="argo_input_user">
+        <inputs>
+            <param name="user" type="select" label="Which kind of user are you ?">
+                <option value="standard">🏊 standard mode simplifies the dataset, remove most of its jargon and return a priori good data</option>
+                <option value="research">🚣 research mode simplifies the dataset to its heart, preserving only data of the highest quality for research studies, including studies sensitive to small pressure and salinity bias </option>
+                <option value="expert">🏄 expert mode return all the Argo data, without any postprocessing</option>
+           </param>
+       </inputs>
+    </xml>
+    <xml name="argo_bibref">
+       <citations>
+            <citation type="doi">doi:10.21105/joss.02425</citation>
+        </citations>
+    </xml>
+</macros>
Binary file test-data/argo_data.netcdf has changed
Binary file test-data/data_from_Eutrophication_Med_profiles_2022_unrestricted.nc has changed