Mercurial > repos > ecology > copernicusmarine
changeset 0:4de886e6300d draft
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/ocean commit a7e53c429cf93485aba692b928defe6ee01633d6
author | ecology |
---|---|
date | Tue, 22 Oct 2024 15:55:13 +0000 |
parents | |
children | 4edd010161e9 |
files | argo_getdata.py check.py copernicusmarine.xml divandfull.jl macro.xml test-data/argo_data.netcdf test-data/data_from_Eutrophication_Med_profiles_2022_unrestricted.nc |
diffstat | 7 files changed, 628 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/argo_getdata.py Tue Oct 22 15:55:13 2024 +0000 @@ -0,0 +1,191 @@ +# author: Marie Jossé + +# Python script + +############################# +# Argo data access # +############################# + +# Packages : argopy + + +# Load arguments +import argparse +import sys + +import argopy + +command_line_args = sys.argv[1:] + + +parser = argparse.ArgumentParser(description="Retrieve argo Data") +# Add arguments + +parser.add_argument("--user", type=str, + help="User mode : standard, expert or research") +parser.add_argument("--cardinal_1", type=float, help="Longitude min") +parser.add_argument("--cardinal_2", type=float, help="Longitude max") +parser.add_argument("--cardinal_3", type=float, help="Latitude min") +parser.add_argument("--cardinal_4", type=float, help="Latitude max") +parser.add_argument("--pressure_1", type=float, help="Pressure min") +parser.add_argument("--pressure_2", type=float, help="Pressure max") +parser.add_argument("--date_1", type=str, help="Starting date") +parser.add_argument("--date_2", type=str, help="Ending date.") +parser.add_argument("--wmo", type=str, help="WMO: argo's identifier") +parser.add_argument("--profile", type=str, help="Number of profiles") +parser.add_argument("--params", type=str, help="List of bgc parameters") +parser.add_argument("--measured", type=str, help="List of bgc parameters") +parser.add_argument("--output_argo", type=str, help="Output data from argo") + +args = parser.parse_args(command_line_args) + + +# Parse the command line arguments + +print(args) +# Import data + +user = args.user +cardinal_1 = args.cardinal_1 +cardinal_2 = args.cardinal_2 +cardinal_3 = args.cardinal_3 +cardinal_4 = args.cardinal_4 +pressure_1 = args.pressure_1 +pressure_2 = args.pressure_2 +date_1 = args.date_1 +date_2 = args.date_2 +wmo = args.wmo +if wmo is not None: + wmo = list(map(int, wmo.split(","))) +profile = args.profile +if profile is not None: + profile = list(map(int, profile.split(","))) +params = args.params +if params is not None: + params = params.split(",") + if len(params) == 83: + params = "all" +measured = args.measured +if measured is not None: + measured = measured.split(",") + +# Let’s import the argopy data fetcher: + +###################### +# User mode # +###################### +# By default, +# all argopy data fetchers are set to work with a standard user mode. +# To change that + +argopy.set_options(mode=user) + +###################### +# Fetching Argo data # +###################### +# Data selection # + +# To access Argo data with a DataFetcher, +# you need to define how to select your data of interest. +# argopy provides 3 different data selection methods: + +argo_data = argopy.DataFetcher() + +# 🗺 For a space/time domain # + +if (cardinal_1 is not None): + mode = "region" + argo_data = argo_data.region([cardinal_1, cardinal_2, + cardinal_3, cardinal_4, + pressure_1, pressure_2, + date_1, date_2]) + +# ⚓ For one or more profiles # +# Use the fetcher access point argopy.DataFetcher.profile() +# to specify the float WMO platform number +# and the profile cycle number(s) to retrieve profiles for. +elif (wmo is not None and profile is not None): + argo_data = argo_data.profile(wmo, profile) + # can also be argo_data = argo_data.profile(6902755, [3, 12]) + mode = "profile" + +# 🤖 For one or more floats # +# If you know the Argo float unique identifier number called a WMO number +# you can use the fetcher access point DataFetcher.float() +# to specify one or more float WMO platform numbers to select. +else: + argo_data = argo_data.float(wmo) + # can also be argo_data = argo_data.float([6902746, 6902755]) + mode = "float" + +# Data sources # +# Let’s start with standard import: +# argopy.reset_options() +# Specify data source erddap, gdac or argovis + +# if (ftp != "") : + # argopy.set_options(src = "gdac", ftp = ftp) +# else : + # argopy.set_options(src = "erddap") + +# With remote, online data sources, +# it may happens that the data server is experiencing down time. +print(argopy.status()) + +# Dataset # +# Argo data are distributed as a single dataset. +# It is referenced at https://doi.org/10.17882/42182. +# But they are several Argo missions with specific files and parameters +# that need special handling by argopy, namely: +# - the core Argo Mission: from floats that measure temperature, +# salinity, pressure down to 2000m, +# - the Deep Argo Mission: from floats that measure temperature, +# salinity, pressure down to 6000m, +# - and the BGC-Argo Mission: from floats that measure temperature, +# salinity, pressure and oxygen, pH, nitrate, chlorophyll, +# backscatter, irradiance down to 2000m. +# You can choose between phy or bgc +if (params is None): + argopy.set_options(dataset="phy") +else: + argopy.set_options(dataset="bgc") + if (measured != ['None'] and measured is not None): + argo_data = argopy.DataFetcher(params=params, measured=measured) + if (mode == "region"): + argo_data = argo_data.region([cardinal_1, cardinal_2, + cardinal_3, cardinal_4, + pressure_1, pressure_2, + date_1, date_2]) + elif (mode == "profile"): + argo_data = argo_data.profile(wmo, profile) + else: + argo_data = argo_data.float(wmo) + else: + argo_data = argopy.DataFetcher(params=params, measured=None) + if (mode == "region"): + argo_data = argo_data.region([cardinal_1, cardinal_2, + cardinal_3, cardinal_4, + pressure_1, pressure_2, + date_1, date_2]) + elif (mode == "profile"): + argo_data = argo_data.profile(wmo, profile) + else: + argo_data = argo_data.float(wmo) + +# Data fetching # +# To fetch (i.e. access, download, format) Argo data, +# argopy provides the DataFetcher class. +# Several DataFetcher arguments exist to help you select the dataset, +# the data source and the user mode the most suited for your applications; +# and also to improve performances. + +# You define the selection of data you want to fetch +# with one of the DataFetcher methods: region, float or profile. +# 2 lines to download Argo data: import and fetch ! + +argo_data = argo_data.load().data +argo_data.to_netcdf("argo_data.nc") + +# argo_metadata = argo_data.to_index() + +print(argo_data)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check.py Tue Oct 22 15:55:13 2024 +0000 @@ -0,0 +1,88 @@ +import os +import subprocess +import sys + + +def validate_command(command): + # Example validation: Ensure the command does not contain + # potentially dangerous substrings + forbidden_substrings = ["rm", + "rm -rf", + "sudo", + "dd if=", + "curl", + "wget", + ";", + "&&"] + for substring in forbidden_substrings: + if substring in command: + message = f"Error: Command has forbidden substring '{substring}'" + return False, message + + # Check if the command starts with 'copernicusmarine' + if not command.startswith("copernicusmarine"): + return False, "Error: Command must start with 'copernicusmarine'" + + # Remove 'copernicusmarine' from the start + command = command[len("copernicusmarine"):].strip() + + # Check for specific commands and their arguments + if command.startswith("subset"): + # Check for required arguments for 'subset' command + if not ("--dataset-id" in command or "--dataset-url" in command): + message = ( + "Error: 'subset' command must have '--dataset-id' or " + "'--dataset-url'" + ) + return False, message + elif command.startswith("get"): + # Check for required arguments for 'get' command + if not ("--dataset-id" in command or "--dataset-url" in command): + message = ( + "Error: 'get' command must have '--dataset-id' or " + "'--dataset-url'" + ) + return False, message + elif command.startswith("login") or command.startswith("describe"): + message = "This tool only accepts 'subset' and 'get' commands." + return False, message + else: + return False, "Error: Command must be 'subset' or 'get'" + + return True, None + + +def main(): + # Check if a filename argument is provided + if len(sys.argv) != 2: + print("Usage: python check.py <config_file>") + sys.exit(1) + + # Get the filename from the command line argument + config_file = sys.argv[1] + + # Check if the file exists + if not os.path.isfile(config_file): + print(f"Error: File '{config_file}' does not exist.") + sys.exit(1) + + # Read the content of the file + with open(config_file, "r") as file: + command = file.read().strip() + + # Validate the command + is_valid, error_message = validate_command(command) + if not is_valid: + print(error_message) + sys.exit(1) + + # Execute the command + try: + subprocess.run(command, shell=True, check=True) + except subprocess.CalledProcessError as e: + print(f"Error: Command failed with exit code {e.returncode}") + sys.exit(1) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/copernicusmarine.xml Tue Oct 22 15:55:13 2024 +0000 @@ -0,0 +1,114 @@ +<tool id="copernicusmarine" name="Copernicue Marine Data Store" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01" license="MIT"> + <description>retrieve marine data</description> + <macros> + <token name="@TOOL_VERSION@">1.3.3</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">copernicusmarine</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #import re + #set $cmems_username = $__user__.extra_preferences.get('cmems_username', "") + #set $cmems_password = $__user__.extra_preferences.get('cmems_password', "") + + #if $cmems_username == "" or $cmems_password == "" + #set $cmems_username = os.getenv('CMEMS_USERNAME', '') + #set $cmems_password = os.getenv('CMEMS_PASSWORD', '') + + echo "Using global ENVs" && + + #if re.match(r"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d).{8,}$", $cmems_password): + echo "Password meets the required criteria" && + #else: + echo "Password does not meet the required criteria" && + #end if + + #end if + + #if $cmems_username != "" + echo 'Error. Set your CMEMS credentials via: User -> Preferences -> Manage Information' && + #end if + + #set $filename="config_file_with_my_command" + echo -e '$input_text' > $filename --force-download --username $cmems_username --password $cmems_password && + + python '$__tool_directory__/check.py' $filename + ]]></command> + <configfiles> + <configfile name="cmems_credentials"> + #set $cmems_username = $__user__.extra_preferences.get('cmems_username', "") + #set $cmems_password = $__user__.extra_preferences.get('cmems_password', "") + cmems_username: $cmems_username + cmems_password: $cmems_password + </configfile> + </configfiles> + <inputs> + <param name="input_text" label="Paste API Request" type="text" area="true"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + <add value="-" /> + <add value=" " /> + <add value=":" /> + <add value="." /> + <add value=" " /> + </valid> + </sanitizer> + <validator type="regex">[0-9a-zA-Z_:\-\. ]+</validator> + </param> + </inputs> + <outputs> + <data name="output_netcdf" label="Copernicus marine data" from_work_dir="./*.nc" format="netcdf"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="input_text" value="copernicusmarine subset --dataset-id cmems_mod_med_phy_my_4.2km-climatology_P1M-m --variable bottomT_avg --variable bottomT_std --variable mlotst_avg --variable mlotst_std --variable so_avg --variable so_std --variable thetao_avg --variable thetao_std --variable uo_avg --variable uo_std --variable vo_avg --variable vo_std --variable zos_avg --variable zos_std --start-datetime 1993-12-01T00:00:00 --end-datetime 1993-12-01T00:00:00 --minimum-longitude 10.283266521135577 --maximum-longitude 12.139348881644054 --minimum-latitude 38.461936025366256 --maximum-latitude 39.82957565942514 --minimum-depth 1.0182366371154785 --maximum-depth 1.0182366371154785"/> + <output name="output_netcdf"> + <assert_contents> + <has_text text="_NCProperties"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +============================ +Copernicus Marine Data Store +============================ + +** Context ** +This tool is a wrapper to retrieve data from the Copernicus Marine Environment Monitoring Service (CMEMS). + +- It allows to retrieve data from the Copernicus Marine Service. +- Any user willing to use this tool needs to `create a new account <https://data.marine.copernicus.eu/login>`_. +- Set your Copernicus CMEMS API Key via: User > Preferences > Manage Information +- Enter your username and password for Copernicus CMEMS +- Compose your request directly on Copernicus Marine Data Store + - Choose there which data interest you click on the download button + - Then on the top right click again on the big download butto + - Log in + - Click on "Automate" + - You should have a pop-up window called "Automate download" + - Copy the ">_Command-Line Interface" proposed there +- Back on Galaxy paste it in the input field "Paste API Request". + +For more information on the Command-Line Interface (CLI) go on `Copernicus Marine Toolbox CLI - Subset <https://help.marine.copernicus.eu/en/articles/7972861-copernicus-marine-toolbox-cli-subset>` + +** Input ** +Command line from the Copernicus marine services copy paste as a text. + +** Output ** +A netcdf file containing the the data chose by the user from the Copernicus Marine Data Store. + + ]]></help> + <citations> + <citation type="bibtex"> + @misc{Copernicus, + title={Copernicus Marine Data Store}, + url={https://data.marine.copernicus.eu/products}, + note={E.U. Copernicus Marine Service Information}, + author={Copernicus, E.U.} + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/divandfull.jl Tue Oct 22 15:55:13 2024 +0000 @@ -0,0 +1,211 @@ +#Julia script + +############################### +## DIVAndrun analsysis ## +############################### +import Pkg; +using Pkg +Pkg.status() + +### Import packages +using DIVAnd +using Dates +using Printf +# Getting the arguments from the command line +args = ARGS + +# Import data +if length(args) < 4 + error("This tool needs at least 4 arguments") +else + netcdf_data = args[1] + longmin = parse(Float64, args[2]) + longmax = parse(Float64, args[3]) + latmin = parse(Float64, args[4]) + latmax = parse(Float64, args[5]) + startdate = args[6] # yyyy,mm,dd + enddate = args[7] + varname = args[8] + selmin = parse(Float64, args[9]) + selmax = parse(Float64, args[10]) + bathname = args[11] +end + +## This script will create a climatology: +# 1. ODV data reading. +# 2. Extraction of bathymetry and creation of mask +# 3. Data download from other sources and duplicate removal. +# 4. Quality control. +# 5. Parameter optimisation. +# 6. Spatio-temporal interpolation with DIVAnd. + + +### Configuration +# Define the horizontal, vertical (depth levels) and temporal resolutions. +# Select the variable of interest + +dx, dy = 0.125, 0.125 +lonr = longmin:dx:longmax +latr = latmin:dy:latmax + +# Convert string in date +startdate = Date(startdate, "yyyy-mm-dd") + +# extract year month day +startyear = year(startdate) +startmonth = month(startdate) +startday = day(startdate) + +# Convert string in date +enddate = Date(enddate, "yyyy-mm-dd") + +# extract year month day +endyear = year(enddate) +endmonth = month(enddate) +endday = day(enddate) + +timerange = [Date(startyear, startmonth, startday),Date(endyear, endmonth, endday)]; + +depthr = [0.,5., 10., 15., 20., 25., 30., 40., 50., 66, + 75, 85, 100, 112, 125, 135, 150, 175, 200, 225, 250, + 275, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, + 800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200, 1250, + 1300, 1350, 1400, 1450, 1500, 1600, 1750, 1850, 2000]; +depthr = [0.,10.,20.]; + +varname = varname +yearlist = [1900:2023]; +monthlist = [[1,2,3],[4,5,6],[7,8,9],[10,11,12]]; + +# We create here the variable TS (for "tDataset(netcdf_data,"r")ime selector"), which allows us to work with the observations corresponding to each period of interest. + +TS = DIVAnd.TimeSelectorYearListMonthList(yearlist,monthlist); +@show TS; + +figdir = "outputs/" +if ~(isdir(figdir)) + mkdir(figdir) +else + @info("Figure directory already exists") +end +### 1. Read your ODV file +# Adapt the datadir and datafile values. +# The example is based on a sub-setting of the Mediterranean Sea aggregated dataset. +# The dataset has been extracted around the Adriatic Sea and exported to a netCDF using Ocean Data +datadir = "../data" + +datafile = netcdf_data + +# Then you can read the full file: +@time obsval,obslon,obslat,obsdepth,obstime,obsid = NCODV.load(Float64, datafile, + "Water body $(varname)"); + +# Check the extremal values of the observations +checkobs((obslon,obslat,obsdepth,obstime),obsval,obsid) + +### 2. Extract the bathymetry + +# It is used to delimit the domain where the interpolation is performed. +## 2.1 Choice of bathymetry + +# Modify bathname according to the resolution required. + +@time bx,by,b = load_bath(bathname,true,lonr,latr); + +## 2.2 Create mask +# False for sea +# True for land + +mask = falses(size(b,1),size(b,2),length(depthr)) +for k = 1:length(depthr) + for j = 1:size(b,2) + for i = 1:size(b,1) + mask[i,j,k] = b[i,j] >= depthr[k] + end + end +end +@show size(mask) + +### 3. Quality control +# We check the salinity value. +# Adapt the criteria to your region and variable. + +sel = (obsval .<= selmax) .& (obsval .>= selmin); + +obsval = obsval[sel] +obslon = obslon[sel] +obslat = obslat[sel] +obsdepth = obsdepth[sel] +obstime = obstime[sel] +obsid = obsid[sel]; + +### 4. Analysis parameters +# Correlation lengths and noise-to-signal ratio + +# We will use the function diva3D for the calculations. +# With this function, the correlation length has to be defined in meters, not in degrees. + +sz = (length(lonr),length(latr),length(depthr)); +lenx = fill(100_000.,sz) # 100 km +leny = fill(100_000.,sz) # 100 km +lenz = fill(25.,sz); # 25 m +len = (lenx, leny, lenz); +epsilon2 = 0.1; + +### Output file name +outputdir = "outputs_netcdf/" +if !isdir(outputdir) + mkpath(outputdir) +end +filename = joinpath(outputdir, "Water_body_$(replace(varname," "=>"_")).nc") + +### 7. Analysis +# Remove the result file before running the analysis, otherwise you'll get the message +if isfile(filename) + rm(filename) # delete the previous analysis + @info "Removing file $filename" +end + +## 7.1 Plotting function +# Define a plotting function that will be applied for each time index and depth level. +# All the figures will be saved in a selected directory. + +function plotres(timeindex,sel,fit,erri) + tmp = copy(fit) + nx,ny,nz = size(tmp) + for i in 1:nz + figure("Additional-Data") + ax = subplot(1,1,1) + ax.tick_params("both",labelsize=6) + ylim(39.0, 46.0); + xlim(11.5, 20.0); + title("Depth: (timeindex)", fontsize=6) + pcolor(lonr.-dx/2.,latr.-dy/2, permutedims(tmp[:,:,i], [2,1]); + vmin = 33, vmax = 40) + colorbar(extend="both", orientation="vertical", shrink=0.8).ax.tick_params(labelsize=8) + + contourf(bx,by,permutedims(b,[2,1]), levels = [-1e5,0],colors = [[.5,.5,.5]]) + aspectratio = 1/cos(mean(latr) * pi/180) + gca().set_aspect(aspectratio) + + figname = varname * @sprintf("_%02d",i) * @sprintf("_%03d.png",timeindex) + plt.savefig(joinpath(figdir, figname), dpi=600, bbox_inches="tight"); + plt.close_figs() + end +end + +## 7.2 Create the gridded fields using diva3d +# Here only the noise-to-signal ratio is estimated. +# Set fitcorrlen to true to also optimise the correlation length. +@time dbinfo = DIVAnd.diva3d((lonr,latr,depthr,TS), + (obslon,obslat,obsdepth,obstime), obsval, + len, epsilon2, + filename,varname, + bathname=bathname, + fitcorrlen = false, + niter_e = 2, + surfextend = true + ); + +# Save the observation metadata in the NetCDF file. +DIVAnd.saveobs(filename,(obslon,obslat,obsdepth,obstime),obsid);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macro.xml Tue Oct 22 15:55:13 2024 +0000 @@ -0,0 +1,24 @@ +<macros> + <token name="@TOOL_VERSION@">0.1.15</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="argo_requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">argopy</requirement> + <yield/> + </requirements> + </xml> + <xml name="argo_input_user"> + <inputs> + <param name="user" type="select" label="Which kind of user are you ?"> + <option value="standard">🏊 standard mode simplifies the dataset, remove most of its jargon and return a priori good data</option> + <option value="research">🚣 research mode simplifies the dataset to its heart, preserving only data of the highest quality for research studies, including studies sensitive to small pressure and salinity bias </option> + <option value="expert">🏄 expert mode return all the Argo data, without any postprocessing</option> + </param> + </inputs> + </xml> + <xml name="argo_bibref"> + <citations> + <citation type="doi">doi:10.21105/joss.02425</citation> + </citations> + </xml> +</macros>