# HG changeset patch
# User ecology
# Date 1642698439 0
# Node ID bf595d613af4628bc5f059ee58c11cd6c698284b
# Parent 123a9a629bef9bfbaea7ec31a3a0d0241bbdacad
"planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit 2166974df82f97557b082a9e55135098e61640c4"
diff -r 123a9a629bef -r bf595d613af4 README.md
--- a/README.md Sun Jun 06 08:51:41 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-# Xarray tools for netCDF
-## netCDF metadata information
-
-The first tool `xarray_metadata_info ` uses xarray to provide users with general information about variable names, dimensions
-and attributes.
-Variables that can be extracted and dimensions available are printed in a tabular file.
-
-The tool also print a general information file. It's the result of the xarray method info().
diff -r 123a9a629bef -r bf595d613af4 macros.xml
--- a/macros.xml Sun Jun 06 08:51:41 2021 +0000
+++ b/macros.xml Thu Jan 20 17:07:19 2022 +0000
@@ -1,185 +1,28 @@
-
- 0.18.2
- 0
-
-
- topic_0610
- topic_3050
-
-
-
-
-
- @article{hoyer2017xarray,
- title = {xarray: {N-D} labeled arrays and datasets in {Python}},
- author = {Hoyer, S. and J. Hamman},
- journal = {Journal of Open Research Software},
- volume = {5},
- number = {1},
- year = {2017},
- publisher = {Ubiquity Press},
- doi = {10.5334/jors.148},
- url = {http://doi.org/10.5334/jors.148}
- }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ 0.20.2
+ 0
+ 20.05
+
+
+ topic_0610
+ topic_3050
+
+
+
+
+
+ @article{hoyer2017xarray,
+ title = {xarray: {N-D} labeled arrays and datasets in {Python}},
+ author = {Hoyer, S. and J. Hamman},
+ journal = {Journal of Open Research Software},
+ volume = {5},
+ number = {1},
+ year = {2017},
+ publisher = {Ubiquity Press},
+ doi = {10.5334/jors.148},
+ url = {http://doi.org/10.5334/jors.148}
+ }
+
+
+
+
diff -r 123a9a629bef -r bf595d613af4 macros_mapplot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros_mapplot.xml Thu Jan 20 17:07:19 2022 +0000
@@ -0,0 +1,213 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 123a9a629bef -r bf595d613af4 macros_netcdf2netcdf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros_netcdf2netcdf.xml Thu Jan 20 17:07:19 2022 +0000
@@ -0,0 +1,143 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 123a9a629bef -r bf595d613af4 macros_tests_netcdf2netcdf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros_tests_netcdf2netcdf.xml Thu Jan 20 17:07:19 2022 +0000
@@ -0,0 +1,263 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 123a9a629bef -r bf595d613af4 test-data/all.netcdf
Binary file test-data/all.netcdf has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/chl_alltimes.nc
Binary file test-data/chl_alltimes.nc has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/chl_nh4.netcdf
Binary file test-data/chl_nh4.netcdf has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/chl_phy_where.netcdf
Binary file test-data/chl_phy_where.netcdf has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/chl_where_drop.netcdf
Binary file test-data/chl_where_drop.netcdf has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/data_from_20040615.nc
Binary file test-data/data_from_20040615.nc has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/data_to_20040615.nc
Binary file test-data/data_to_20040615.nc has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0.png
Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0.png has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0_title.png
Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0_title.png has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time1.png
Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time1.png has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time50.png
Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time50.png has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/info_file.txt
--- a/test-data/info_file.txt Sun Jun 06 08:51:41 2021 +0000
+++ b/test-data/info_file.txt Thu Jan 20 17:07:19 2022 +0000
@@ -1,9 +1,9 @@
xarray.Dataset {
dimensions:
+ time = 145 ;
depth = 1 ;
latitude = 97 ;
longitude = 103 ;
- time = 145 ;
variables:
float32 phy(time, depth, latitude, longitude) ;
diff -r 123a9a629bef -r bf595d613af4 test-data/select_by_values.netcdf
Binary file test-data/select_by_values.netcdf has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/small.netcdf
Binary file test-data/small.netcdf has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/small_all_variables.netcdf
Binary file test-data/small_all_variables.netcdf has changed
diff -r 123a9a629bef -r bf595d613af4 test-data/version.tabular
--- a/test-data/version.tabular Sun Jun 06 08:51:41 2021 +0000
+++ b/test-data/version.tabular Thu Jan 20 17:07:19 2022 +0000
@@ -1,1 +1,1 @@
-Galaxy xarray version 0.18.2
+Galaxy xarray version 0.20.2
diff -r 123a9a629bef -r bf595d613af4 test-data/where_condition.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/where_condition.txt Thu Jan 20 17:07:19 2022 +0000
@@ -0,0 +1,1 @@
+((chl > 1) | (chl < 45)) & (nh4 > 1)
diff -r 123a9a629bef -r bf595d613af4 test-data/where_condition_simple.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/where_condition_simple.txt Thu Jan 20 17:07:19 2022 +0000
@@ -0,0 +1,1 @@
+nh4 > 5.15
diff -r 123a9a629bef -r bf595d613af4 xarray_mapplot.py
--- a/xarray_mapplot.py Sun Jun 06 08:51:41 2021 +0000
+++ b/xarray_mapplot.py Thu Jan 20 17:07:19 2022 +0000
@@ -1,457 +1,411 @@
-#!/usr/bin/env python3
-#
-#
-# usage: xarray_mapplot.py [-h] [--proj PROJ]
-# [--cmap CMAP]
-# [--output OUTPUT]
-# [--time TIMES]
-# [--nrow NROW]
-# [--ncol NCOL]
-# [--title title]
-# [--latitude LATITUDE]
-# [--longitude LONGITUDE]
-# [--land ALPHA-LAND]
-# [--ocean ALPHA-OCEAN]
-# [--coastline ALPHA-COASTLINE]
-# [--borders ALPHA-BORDERS]
-# [--xlim "x1,x2"]
-# [--ylim "y1,y2"]
-# [--range "valmin,valmax"]
-# [--threshold VAL]
-# [--label label-colorbar]
-# [--shift]
-# [-v]
-# input varname
-#
-# positional arguments:
-# input input filename with geographical coordinates (netCDF
-# format)
-# varname Specify which variable to plot (case sensitive)
-#
-# optional arguments:
-# -h, --help show this help message and exit
-# --proj PROJ Specify the projection on which we draw
-# --cmap CMAP Specify which colormap to use for plotting
-# --output OUTPUT output filename to store resulting image (png format)
-# --time TIMES time index from the file for multiple plots ("0 1 2 3")
-# --title plot or subplot title
-# --latitude variable name for latitude
-# --longitude variable name for longitude
-# --land add land on plot with alpha value [0-1]
-# --ocean add oceans on plot with alpha value [0-1]
-# --coastline add coastline with alpha value [0-1]
-# --borders add country borders with alpha value [0-1]
-# --xlim limited geographical area longitudes "x1,x2"
-# --ylim limited geographical area latitudes "y1,y2"
-# --range "valmin,valmax" for plotting
-# --threshold do not plot values below threshold
-# --label set a label for colormap
-# --shift shift longitudes if specified
-# -v, --verbose switch on verbose mode
-#
-
-import argparse
-import ast
-import warnings
-from pathlib import Path
-
-import cartopy.crs as ccrs
-import cartopy.feature as feature
-
-from cmcrameri import cm
-
-import matplotlib as mpl
-mpl.use('Agg')
-from matplotlib import pyplot # noqa: I202,E402
-
-import xarray as xr # noqa: E402
-
-
-class MapPlotXr ():
- def __init__(self, input, proj, varname, cmap, output, verbose=False,
- time=[], title="", latitude="latitude",
- longitude="longitude", land=0, ocean=0,
- coastline=0, borders=0, xlim=[], ylim=[],
- threshold="", label="", shift=False,
- range_values=[]):
- self.input = input
- print("PROJ", proj)
- if proj != "" and proj is not None:
- self.proj = proj.replace('X', ':')
- else:
- self.proj = proj
- self.varname = varname
- self.get_cmap(cmap)
- self.time = time
- self.latitude = latitude
- self.longitude = longitude
- self.land = land
- self.ocean = ocean
- self.coastline = coastline
- self.borders = borders
- self.xlim = xlim
- self.ylim = ylim
- self.range = range_values
- self.threshold = threshold
- self.shift = shift
- self.xylim_supported = False
- self.colorbar = True
- self.title = title
- if output is None:
- self.output = Path(input).stem + '.png'
- else:
- self.output = output
- self.verbose = verbose
- self.dset = xr.open_dataset(self.input, use_cftime=True)
-
- self.label = {}
- if label != "" and label is not None:
- self.label['label'] = label
- if verbose:
- print("input: ", self.input)
- print("proj: ", self.proj)
- print("varname: ", self.varname)
- print("time: ", self.time)
- print("minval, maxval: ", self.range)
- print("title: ", self.title)
- print("output: ", self.output)
- print("label: ", self.label)
- print("shift: ", self.shift)
- print("ocean: ", self.ocean)
- print("land: ", self.land)
- print("coastline: ", self.coastline)
- print("borders: ", self.borders)
- print("latitude: ", self.latitude)
- print("longitude: ", self.longitude)
- print("xlim: ", self.xlim)
- print("ylim: ", self.ylim)
-
- def get_cmap(self, cmap):
- if cmap[0:3] == 'cm.':
- self.cmap = cm.__dict__[cmap[3:]]
- else:
- self.cmap = cmap
-
- def projection(self):
- if self.proj is None:
- return ccrs.PlateCarree()
-
- proj_dict = ast.literal_eval(self.proj)
-
- user_proj = proj_dict.pop("proj")
- if user_proj == 'PlateCarree':
- self.xylim_supported = True
- return ccrs.PlateCarree(**proj_dict)
- elif user_proj == 'AlbersEqualArea':
- return ccrs.AlbersEqualArea(**proj_dict)
- elif user_proj == 'AzimuthalEquidistant':
- return ccrs.AzimuthalEquidistant(**proj_dict)
- elif user_proj == 'EquidistantConic':
- return ccrs.EquidistantConic(**proj_dict)
- elif user_proj == 'LambertConformal':
- return ccrs.LambertConformal(**proj_dict)
- elif user_proj == 'LambertCylindrical':
- return ccrs.LambertCylindrical(**proj_dict)
- elif user_proj == 'Mercator':
- return ccrs.Mercator(**proj_dict)
- elif user_proj == 'Miller':
- return ccrs.Miller(**proj_dict)
- elif user_proj == 'Mollweide':
- return ccrs.Mollweide(**proj_dict)
- elif user_proj == 'Orthographic':
- return ccrs.Orthographic(**proj_dict)
- elif user_proj == 'Robinson':
- return ccrs.Robinson(**proj_dict)
- elif user_proj == 'Sinusoidal':
- return ccrs.Sinusoidal(**proj_dict)
- elif user_proj == 'Stereographic':
- return ccrs.Stereographic(**proj_dict)
- elif user_proj == 'TransverseMercator':
- return ccrs.TransverseMercator(**proj_dict)
- elif user_proj == 'UTM':
- return ccrs.UTM(**proj_dict)
- elif user_proj == 'InterruptedGoodeHomolosine':
- return ccrs.InterruptedGoodeHomolosine(**proj_dict)
- elif user_proj == 'RotatedPole':
- return ccrs.RotatedPole(**proj_dict)
- elif user_proj == 'OSGB':
- self.xylim_supported = False
- return ccrs.OSGB(**proj_dict)
- elif user_proj == 'EuroPP':
- self.xylim_supported = False
- return ccrs.EuroPP(**proj_dict)
- elif user_proj == 'Geostationary':
- return ccrs.Geostationary(**proj_dict)
- elif user_proj == 'NearsidePerspective':
- return ccrs.NearsidePerspective(**proj_dict)
- elif user_proj == 'EckertI':
- return ccrs.EckertI(**proj_dict)
- elif user_proj == 'EckertII':
- return ccrs.EckertII(**proj_dict)
- elif user_proj == 'EckertIII':
- return ccrs.EckertIII(**proj_dict)
- elif user_proj == 'EckertIV':
- return ccrs.EckertIV(**proj_dict)
- elif user_proj == 'EckertV':
- return ccrs.EckertV(**proj_dict)
- elif user_proj == 'EckertVI':
- return ccrs.EckertVI(**proj_dict)
- elif user_proj == 'EqualEarth':
- return ccrs.EqualEarth(**proj_dict)
- elif user_proj == 'Gnomonic':
- return ccrs.Gnomonic(**proj_dict)
- elif user_proj == 'LambertAzimuthalEqualArea':
- return ccrs.LambertAzimuthalEqualArea(**proj_dict)
- elif user_proj == 'NorthPolarStereo':
- return ccrs.NorthPolarStereo(**proj_dict)
- elif user_proj == 'OSNI':
- return ccrs.OSNI(**proj_dict)
- elif user_proj == 'SouthPolarStereo':
- return ccrs.SouthPolarStereo(**proj_dict)
-
- def plot(self, ts=None):
- if self.shift:
- if self.longitude == 'longitude':
- self.dset = self.dset.assign_coords(
- longitude=(((
- self.dset[self.longitude]
- + 180) % 360) - 180))
- elif self.longitude == 'lon':
- self.dset = self.dset.assign_coords(
- lon=(((self.dset[self.longitude]
- + 180) % 360) - 180))
-
- pyplot.figure(1, figsize=[20, 10])
-
- # Set the projection to use for plotting
- ax = pyplot.subplot(1, 1, 1, projection=self.projection())
- if self.land:
- ax.add_feature(feature.LAND, alpha=self.land)
-
- if self.ocean:
- ax.add_feature(feature.OCEAN, alpha=self.ocean)
- if self.coastline:
- ax.coastlines(resolution='10m', alpha=self.coastline)
- if self.borders:
- ax.add_feature(feature.BORDERS, linestyle=':', alpha=self.borders)
-
- if self.xlim:
- min_lon = min(self.xlim[0], self.xlim[1])
- max_lon = max(self.xlim[0], self.xlim[1])
- else:
- min_lon = self.dset[self.longitude].min()
- max_lon = self.dset[self.longitude].max()
-
- if self.ylim:
- min_lat = min(self.ylim[0], self.ylim[1])
- max_lat = max(self.ylim[0], self.ylim[1])
- else:
- min_lat = self.dset[self.latitude].min()
- max_lat = self.dset[self.latitude].max()
-
- if self.xylim_supported:
- pyplot.xlim(min_lon, max_lon)
- pyplot.ylim(min_lat, max_lat)
-
- # Fix extent
- if self.threshold == "" or self.threshold is None:
- threshold = self.dset[self.varname].min()
- else:
- threshold = float(self.threshold)
-
- if self.range == []:
- minval = self.dset[self.varname].min()
- maxval = self.dset[self.varname].max()
- else:
- minval = self.range[0]
- maxval = self.range[1]
-
- if self.verbose:
- print("minval: ", minval)
- print("maxval: ", maxval)
-
- # pass extent with vmin and vmax parameters
- proj_t = ccrs.PlateCarree()
- if ts is None:
- self.dset.where(
- self.dset[self.varname] > threshold
- )[self.varname].plot(ax=ax,
- vmin=minval,
- vmax=maxval,
- transform=proj_t,
- cmap=self.cmap,
- cbar_kwargs=self.label
- )
- if self.title != "" and self.title is not None:
- pyplot.title(self.title)
- pyplot.savefig(self.output)
- else:
- if self.colorbar:
- self.dset.where(
- self.dset[self.varname] > threshold
- )[self.varname].isel(time=ts).plot(ax=ax,
- vmin=minval,
- vmax=maxval,
- transform=proj_t,
- cmap=self.cmap,
- cbar_kwargs=self.label
- )
- else:
- self.dset.where(
- self.dset[self.varname] > minval
- )[self.varname].isel(time=ts).plot(ax=ax,
- vmin=minval,
- vmax=maxval,
- transform=proj_t,
- cmap=self.cmap,
- add_colorbar=False)
- if self.title != "" and self.title is not None:
- pyplot.title(self.title + "(time = " + str(ts) + ')')
- pyplot.savefig(self.output[:-4] + "_time" + str(ts) +
- self.output[-4:]) # assume png format
-
-
-if __name__ == '__main__':
- warnings.filterwarnings("ignore")
- parser = argparse.ArgumentParser()
- parser.add_argument(
- 'input',
- help='input filename with geographical coordinates (netCDF format)'
- )
-
- parser.add_argument(
- '--proj',
- help='Specify the projection on which we draw'
- )
- parser.add_argument(
- 'varname',
- help='Specify which variable to plot (case sensitive)'
- )
- parser.add_argument(
- '--cmap',
- help='Specify which colormap to use for plotting'
- )
- parser.add_argument(
- '--output',
- help='output filename to store resulting image (png format)'
- )
- parser.add_argument(
- '--time',
- help='list of times to plot for multiple plots'
- )
- parser.add_argument(
- '--title',
- help='plot title'
- )
- parser.add_argument(
- '--latitude',
- help='variable name for latitude'
- )
- parser.add_argument(
- '--longitude',
- help='variable name for longitude'
- )
- parser.add_argument(
- '--land',
- help='add land on plot with alpha value [0-1]'
- )
- parser.add_argument(
- '--ocean',
- help='add oceans on plot with alpha value [0-1]'
- )
- parser.add_argument(
- '--coastline',
- help='add coastline with alpha value [0-1]'
- )
- parser.add_argument(
- '--borders',
- help='add country borders with alpha value [0-1]'
- )
- parser.add_argument(
- '--xlim',
- help='limited geographical area longitudes "x1,x2"'
- )
- parser.add_argument(
- '--ylim',
- help='limited geographical area latitudes "y1,y2"'
- )
- parser.add_argument(
- '--range',
- help='min and max values for plotting "minval,maxval"'
- )
- parser.add_argument(
- '--threshold',
- help='do not plot values below threshold'
- )
- parser.add_argument(
- '--label',
- help='set a label for colorbar'
- )
- parser.add_argument(
- '--shift',
- help='shift longitudes if specified',
- action="store_true"
- )
- parser.add_argument(
- "-v", "--verbose",
- help="switch on verbose mode",
- action="store_true")
- args = parser.parse_args()
-
- if args.time is None:
- time = []
- else:
- time = list(map(int, args.time.split(",")))
- if args.xlim is None:
- xlim = []
- else:
- xlim = list(map(float, args.xlim.split(",")))
- if args.ylim is None:
- ylim = []
- else:
- ylim = list(map(float, args.ylim.split(",")))
- if args.range is None:
- range_values = []
- else:
- range_values = list(map(float, args.range.split(",")))
- if args.latitude is None:
- latitude = "latitude"
- else:
- latitude = args.latitude
- if args.longitude is None:
- longitude = "longitude"
- else:
- longitude = args.longitude
- if args.land is None:
- land = 0
- else:
- land = float(args.land)
- if args.ocean is None:
- ocean = 0
- else:
- ocean = float(args.ocean)
- if args.coastline is None:
- coastline = 0
- else:
- coastline = float(args.coastline)
- if args.borders is None:
- borders = 0
- else:
- borders = float(args.borders)
-
- dset = MapPlotXr(input=args.input, proj=args.proj, varname=args.varname,
- cmap=args.cmap, output=args.output, verbose=args.verbose,
- time=time, title=args.title,
- latitude=latitude, longitude=longitude, land=land,
- ocean=ocean, coastline=coastline, borders=borders,
- xlim=xlim, ylim=ylim, threshold=args.threshold,
- label=args.label, shift=args.shift,
- range_values=range_values)
-
- if dset.time == []:
- dset.plot()
- else:
- for t in dset.time:
- dset.plot(t)
- dset.shift = False # only shift once
- dset.colorbar = True
+#!/usr/bin/env python3
+#
+#
+# usage: xarray_mapplot.py [-h] [--proj PROJ]
+# [--cmap CMAP]
+# [--output OUTPUT]
+# [--time TIMES]
+# [--nrow NROW]
+# [--ncol NCOL]
+# [--title title]
+# [--latitude LATITUDE]
+# [--longitude LONGITUDE]
+# [--land ALPHA-LAND]
+# [--ocean ALPHA-OCEAN]
+# [--coastline ALPHA-COASTLINE]
+# [--borders ALPHA-BORDERS]
+# [--xlim "x1,x2"]
+# [--ylim "y1,y2"]
+# [--range "valmin,valmax"]
+# [--threshold VAL]
+# [--label label-colorbar]
+# [--config config-file]
+# [--shift]
+# [-v]
+# input varname
+#
+# positional arguments:
+# input input filename with geographical coordinates (netCDF
+# format)
+# varname Specify which variable to plot (case sensitive)
+#
+# optional arguments:
+# -h, --help show this help message and exit
+# --proj PROJ Specify the projection on which we draw
+# --cmap CMAP Specify which colormap to use for plotting
+# --output OUTPUT output filename to store resulting image (png format)
+# --time TIMES time index from the file for multiple plots ("0 1 2 3")
+# --title plot or subplot title
+# --latitude variable name for latitude
+# --longitude variable name for longitude
+# --land add land on plot with alpha value [0-1]
+# --ocean add oceans on plot with alpha value [0-1]
+# --coastline add coastline with alpha value [0-1]
+# --borders add country borders with alpha value [0-1]
+# --xlim limited geographical area longitudes "x1,x2"
+# --ylim limited geographical area latitudes "y1,y2"
+# --range "valmin,valmax" for plotting
+# --threshold do not plot values below threshold
+# --label set a label for colormap
+# --config plotting parameters are passed via a config file
+# (overwrite other plotting options)
+# --shift shift longitudes if specified
+# -v, --verbose switch on verbose mode
+#
+
+import argparse
+import ast
+import warnings
+from pathlib import Path
+
+import cartopy.crs as ccrs
+import cartopy.feature as feature
+
+from cmcrameri import cm
+
+import matplotlib as mpl
+mpl.use('Agg')
+from matplotlib import pyplot # noqa: I202,E402
+
+import xarray as xr # noqa: E402
+
+
+class MapPlotXr ():
+ def __init__(self, input, varname, output, verbose=False,
+ config_file="", proj="", shift=False):
+
+ li = list(input.split(","))
+ if len(li) > 1:
+ self.input = li
+ else:
+ self.input = input
+
+ if proj != "" and proj is not None and Path(proj).exists():
+ f = open(proj)
+ sdict = ''.join(
+ f.read().replace("\n", "").split('{')[1].split('}')[0]
+ )
+ self.proj = '{' + sdict.strip() + '}'
+ else:
+ self.proj = None
+ self.varname = varname
+ self.shift = shift
+ self.xylim_supported = False
+ self.colorbar = True
+ if output is None:
+ if type(self.input) is list:
+ self.output = Path(self.input[0]).stem + '.png'
+ else:
+ self.output = Path(self.input).stem + '.png'
+ else:
+ self.output = output
+ self.verbose = verbose
+ self.label = {}
+ self.time = []
+ self.xlim = []
+ self.ylim = []
+ self.range = []
+ self.latitude = "latitude"
+ self.longitude = "longitude"
+ self.land = 0
+ self.ocean = 0
+ self.coastline = 0
+ self.borders = 0
+ self.cmap = "coolwarm"
+ self.threshold = ""
+ self.title = ""
+
+ if config_file != "" and config_file is not None:
+ with open(config_file) as f:
+ sdict = ''.join(
+ f.read().replace("\n", "").split('{')[1].split('}')[0]
+ )
+ tmp = ast.literal_eval('{' + sdict.strip() + '}')
+ for key in tmp:
+ if key == 'time':
+ time = tmp[key]
+ self.time = list(map(int, time.split(",")))
+ if key == 'cmap':
+ self.get_cmap(tmp[key])
+ if key == 'latitude':
+ self.latitude = tmp[key]
+ if key == 'longitude':
+ self.longitude = tmp[key]
+ if key == 'land':
+ self.land = float(tmp[key])
+ if key == 'ocean':
+ self.ocean = float(tmp[key])
+ if key == 'coastline':
+ self.coastline = float(tmp[key])
+ if key == 'borders':
+ self.borders = float(tmp[key])
+ if key == 'xlim':
+ xlim = tmp[key]
+ self.xlim = list(map(float, xlim.split(",")))
+ if key == 'ylim':
+ ylim = tmp[key]
+ self.ylim = list(map(float, ylim.split(",")))
+ if key == 'range':
+ range_values = tmp[key]
+ self.range = list(map(float, range_values.split(",")))
+ if key == 'threshold':
+ self.threshold = float(tmp[key])
+ if key == 'label':
+ self.label['label'] = tmp[key]
+ if key == 'title':
+ self.title = tmp[key]
+
+ if type(self.input) is list:
+ self.dset = xr.open_mfdataset(self.input, use_cftime=True)
+ else:
+ self.dset = xr.open_dataset(self.input, use_cftime=True)
+
+ if verbose:
+ print("input: ", self.input)
+ print("proj: ", self.proj)
+ print("varname: ", self.varname)
+ print("time: ", self.time)
+ print("minval, maxval: ", self.range)
+ print("title: ", self.title)
+ print("output: ", self.output)
+ print("label: ", self.label)
+ print("shift: ", self.shift)
+ print("ocean: ", self.ocean)
+ print("land: ", self.land)
+ print("coastline: ", self.coastline)
+ print("borders: ", self.borders)
+ print("latitude: ", self.latitude)
+ print("longitude: ", self.longitude)
+ print("xlim: ", self.xlim)
+ print("ylim: ", self.ylim)
+
+ def get_cmap(self, cmap):
+ if cmap[0:3] == 'cm.':
+ self.cmap = cm.__dict__[cmap[3:]]
+ else:
+ self.cmap = cmap
+
+ def projection(self):
+ if self.proj is None:
+ return ccrs.PlateCarree()
+
+ proj_dict = ast.literal_eval(self.proj)
+ user_proj = proj_dict.pop("proj")
+ if user_proj == 'PlateCarree':
+ self.xylim_supported = True
+ return ccrs.PlateCarree(**proj_dict)
+ elif user_proj == 'AlbersEqualArea':
+ return ccrs.AlbersEqualArea(**proj_dict)
+ elif user_proj == 'AzimuthalEquidistant':
+ return ccrs.AzimuthalEquidistant(**proj_dict)
+ elif user_proj == 'EquidistantConic':
+ return ccrs.EquidistantConic(**proj_dict)
+ elif user_proj == 'LambertConformal':
+ return ccrs.LambertConformal(**proj_dict)
+ elif user_proj == 'LambertCylindrical':
+ return ccrs.LambertCylindrical(**proj_dict)
+ elif user_proj == 'Mercator':
+ return ccrs.Mercator(**proj_dict)
+ elif user_proj == 'Miller':
+ return ccrs.Miller(**proj_dict)
+ elif user_proj == 'Mollweide':
+ return ccrs.Mollweide(**proj_dict)
+ elif user_proj == 'Orthographic':
+ return ccrs.Orthographic(**proj_dict)
+ elif user_proj == 'Robinson':
+ return ccrs.Robinson(**proj_dict)
+ elif user_proj == 'Sinusoidal':
+ return ccrs.Sinusoidal(**proj_dict)
+ elif user_proj == 'Stereographic':
+ return ccrs.Stereographic(**proj_dict)
+ elif user_proj == 'TransverseMercator':
+ return ccrs.TransverseMercator(**proj_dict)
+ elif user_proj == 'UTM':
+ return ccrs.UTM(**proj_dict)
+ elif user_proj == 'InterruptedGoodeHomolosine':
+ return ccrs.InterruptedGoodeHomolosine(**proj_dict)
+ elif user_proj == 'RotatedPole':
+ return ccrs.RotatedPole(**proj_dict)
+ elif user_proj == 'OSGB':
+ self.xylim_supported = False
+ return ccrs.OSGB(**proj_dict)
+ elif user_proj == 'EuroPP':
+ self.xylim_supported = False
+ return ccrs.EuroPP(**proj_dict)
+ elif user_proj == 'Geostationary':
+ return ccrs.Geostationary(**proj_dict)
+ elif user_proj == 'NearsidePerspective':
+ return ccrs.NearsidePerspective(**proj_dict)
+ elif user_proj == 'EckertI':
+ return ccrs.EckertI(**proj_dict)
+ elif user_proj == 'EckertII':
+ return ccrs.EckertII(**proj_dict)
+ elif user_proj == 'EckertIII':
+ return ccrs.EckertIII(**proj_dict)
+ elif user_proj == 'EckertIV':
+ return ccrs.EckertIV(**proj_dict)
+ elif user_proj == 'EckertV':
+ return ccrs.EckertV(**proj_dict)
+ elif user_proj == 'EckertVI':
+ return ccrs.EckertVI(**proj_dict)
+ elif user_proj == 'EqualEarth':
+ return ccrs.EqualEarth(**proj_dict)
+ elif user_proj == 'Gnomonic':
+ return ccrs.Gnomonic(**proj_dict)
+ elif user_proj == 'LambertAzimuthalEqualArea':
+ return ccrs.LambertAzimuthalEqualArea(**proj_dict)
+ elif user_proj == 'NorthPolarStereo':
+ return ccrs.NorthPolarStereo(**proj_dict)
+ elif user_proj == 'OSNI':
+ return ccrs.OSNI(**proj_dict)
+ elif user_proj == 'SouthPolarStereo':
+ return ccrs.SouthPolarStereo(**proj_dict)
+
+ def plot(self, ts=None):
+ if self.shift:
+ if self.longitude == 'longitude':
+ self.dset = self.dset.assign_coords(
+ longitude=(((
+ self.dset[self.longitude]
+ + 180) % 360) - 180))
+ elif self.longitude == 'lon':
+ self.dset = self.dset.assign_coords(
+ lon=(((self.dset[self.longitude]
+ + 180) % 360) - 180))
+
+ pyplot.figure(1, figsize=[20, 10])
+
+ # Set the projection to use for plotting
+ ax = pyplot.subplot(1, 1, 1, projection=self.projection())
+ if self.land:
+ ax.add_feature(feature.LAND, alpha=self.land)
+
+ if self.ocean:
+ ax.add_feature(feature.OCEAN, alpha=self.ocean)
+ if self.coastline:
+ ax.coastlines(resolution='10m', alpha=self.coastline)
+ if self.borders:
+ ax.add_feature(feature.BORDERS, linestyle=':', alpha=self.borders)
+
+ if self.xlim:
+ min_lon = min(self.xlim[0], self.xlim[1])
+ max_lon = max(self.xlim[0], self.xlim[1])
+ else:
+ min_lon = self.dset[self.longitude].min()
+ max_lon = self.dset[self.longitude].max()
+
+ if self.ylim:
+ min_lat = min(self.ylim[0], self.ylim[1])
+ max_lat = max(self.ylim[0], self.ylim[1])
+ else:
+ min_lat = self.dset[self.latitude].min()
+ max_lat = self.dset[self.latitude].max()
+
+ if self.xylim_supported:
+ pyplot.xlim(min_lon, max_lon)
+ pyplot.ylim(min_lat, max_lat)
+
+ # Fix extent
+ if self.threshold == "" or self.threshold is None:
+ threshold = self.dset[self.varname].min()
+ else:
+ threshold = float(self.threshold)
+
+ if self.range == []:
+ minval = self.dset[self.varname].min()
+ maxval = self.dset[self.varname].max()
+ else:
+ minval = self.range[0]
+ maxval = self.range[1]
+
+ if self.verbose:
+ print("minval: ", minval)
+ print("maxval: ", maxval)
+
+ # pass extent with vmin and vmax parameters
+ proj_t = ccrs.PlateCarree()
+ if ts is None:
+ self.dset.where(
+ self.dset[self.varname] > threshold
+ )[self.varname].plot(ax=ax,
+ vmin=minval,
+ vmax=maxval,
+ transform=proj_t,
+ cmap=self.cmap,
+ cbar_kwargs=self.label
+ )
+ if self.title != "" and self.title is not None:
+ pyplot.title(self.title)
+ pyplot.savefig(self.output)
+ else:
+ if self.colorbar:
+ self.dset.where(
+ self.dset[self.varname] > threshold
+ )[self.varname].isel(time=ts).plot(ax=ax,
+ vmin=minval,
+ vmax=maxval,
+ transform=proj_t,
+ cmap=self.cmap,
+ cbar_kwargs=self.label
+ )
+ else:
+ self.dset.where(
+ self.dset[self.varname] > minval
+ )[self.varname].isel(time=ts).plot(ax=ax,
+ vmin=minval,
+ vmax=maxval,
+ transform=proj_t,
+ cmap=self.cmap,
+ add_colorbar=False)
+ if self.title != "" and self.title is not None:
+ pyplot.title(self.title + "(time = " + str(ts) + ')')
+ pyplot.savefig(self.output[:-4] + "_time" + str(ts) +
+ self.output[-4:]) # assume png format
+
+
+if __name__ == '__main__':
+ warnings.filterwarnings("ignore")
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'input',
+ help='input filename with geographical coordinates (netCDF format)'
+ )
+ parser.add_argument(
+ '--proj',
+ help='Config file with the projection on which we draw'
+ )
+ parser.add_argument(
+ 'varname',
+ help='Specify which variable to plot (case sensitive)'
+ )
+ parser.add_argument(
+ '--output',
+ help='output filename to store resulting image (png format)'
+ )
+ parser.add_argument(
+ '--config',
+ help='pass plotting parameters via a config file'
+ )
+ parser.add_argument(
+ '--shift',
+ help='shift longitudes if specified',
+ action="store_true"
+ )
+ parser.add_argument(
+ "-v", "--verbose",
+ help="switch on verbose mode",
+ action="store_true")
+ args = parser.parse_args()
+
+ dset = MapPlotXr(input=args.input, varname=args.varname,
+ output=args.output, verbose=args.verbose,
+ config_file=args.config, proj=args.proj,
+ shift=args.shift)
+
+ if dset.time == []:
+ dset.plot()
+ else:
+ for t in dset.time:
+ dset.plot(t)
+ dset.shift = False # only shift once
+ dset.colorbar = True
diff -r 123a9a629bef -r bf595d613af4 xarray_netcdf2netcdf.py
--- a/xarray_netcdf2netcdf.py Sun Jun 06 08:51:41 2021 +0000
+++ b/xarray_netcdf2netcdf.py Thu Jan 20 17:07:19 2022 +0000
@@ -1,133 +1,268 @@
-#!/usr/bin/env python3
-#
-# Apply operations on selected variables
-# - scale
-# one can also select the range of time (for timeseries)
-# to apply these operations over the range only
-# when a range of time is selected and when scaling, one
-# can choose to save the entire timeseries or
-# the selected range only.
-# when scaling, one can add additional filters on dimensions
-# (typically used to filter over latitudes and longitudes)
-
-
-import argparse
-import warnings
-
-import xarray as xr # noqa: E402
-
-
-class netCDF2netCDF ():
- def __init__(self, infile, varname, scale="",
- output="output.netcdf",
- write_all=False,
- filter_list="",
- verbose=False):
- self.infile = infile
- self.verbose = verbose
- self.varname = varname
- self.write_all = write_all
- self.filter = filter_list
- self.selection = {}
- if scale == "" or scale is None:
- self.scale = 1
- else:
- self.scale = float(scale)
- if output is None:
- self.output = "output.netcdf"
- else:
- self.output = output
- # initialization
- self.dset = None
- self.subset = None
- if self.verbose:
- print("infile: ", self.infile)
- print("varname: ", self.varname)
- print("filter_list: ", self.filter)
- print("scale: ", self.scale)
- print("write_all: ", self.write_all)
- print("output: ", self.output)
-
- def dimension_selection(self, single_filter):
- split_filter = single_filter.split('#')
- dimension_varname = split_filter[0]
- op = split_filter[1]
- ll = int(split_filter[2])
- if (op == 'sl'):
- rl = int(split_filter[3])
- self.selection[dimension_varname] = slice(ll, rl)
- elif (op == 'to'):
- self.selection[dimension_varname] = slice(None, ll)
- elif (op == 'from'):
- self.selection[dimension_varname] = slice(ll, None)
- elif (op == 'is'):
- self.selection[dimension_varname] = ll
-
- def filter_selection(self):
- for single_filter in self.filter:
- self.dimension_selection(single_filter)
- if self.write_all:
- self.ds[self.varname] = \
- self.ds[self.varname].isel(self.selection)*self.scale
- else:
- self.dset = \
- self.ds[self.varname].isel(self.selection)*self.scale
-
- def compute(self):
- if self.dset is None:
- self.ds = xr.open_dataset(self.infile)
- if self.filter:
- self.filter_selection()
- if self.verbose:
- print(self.selection)
- elif self.write_all is not None:
- self.dset = self.ds[self.varname]
-
- def save(self):
- if self.write_all:
- self.ds.to_netcdf(self.output)
- else:
- self.dset.to_netcdf(self.output)
-
-
-if __name__ == '__main__':
- warnings.filterwarnings("ignore")
- parser = argparse.ArgumentParser()
- parser.add_argument(
- 'input',
- help='input filename in netCDF format'
- )
- parser.add_argument(
- 'varname',
- help='Specify which variable to plot (case sensitive)'
- )
- parser.add_argument(
- '--filter',
- nargs="*",
- help='Filter list variable#operator#value_s#value_e'
- )
- parser.add_argument(
- '--output',
- help='Output filename to store the resulting netCDF file'
- )
- parser.add_argument(
- '--scale',
- help='scale factor to apply to selection (float)'
- )
- parser.add_argument(
- "--write_all",
- help="write all data to netCDF",
- action="store_true")
- parser.add_argument(
- "-v", "--verbose",
- help="switch on verbose mode",
- action="store_true")
- args = parser.parse_args()
-
- dset = netCDF2netCDF(infile=args.input, varname=args.varname,
- scale=args.scale, output=args.output,
- filter_list=args.filter,
- write_all=args.write_all,
- verbose=args.verbose)
- dset.compute()
- dset.save()
+#!/usr/bin/env python3
+#
+# Apply operations on selected variables
+# - scale
+# one can also select the range of time (for timeseries)
+# to apply these operations over the range only
+# when a range of time is selected and when scaling, one
+# can choose to save the entire timeseries or
+# the selected range only.
+# when scaling, one can add additional filters on dimensions
+# (typically used to filter over latitudes and longitudes)
+
+
+import argparse
+import re
+import warnings
+from pathlib import Path
+
+import xarray as xr # noqa: E402
+
+
+class netCDF2netCDF ():
+ def __init__(self, infile, varname, scale="",
+ output="output.netcdf",
+ write_all=False,
+ keep_attributes=True,
+ filter_list="",
+ where_config="",
+ other="",
+ sel=False,
+ drop=False,
+ verbose=False):
+ self.drop = drop
+ if Path(where_config).exists():
+ f = open(where_config)
+ self.where = f.read().replace("\n", "")
+ else:
+ self.where = ""
+ self.other = other
+ self.sel = sel
+ li = list(infile.split(","))
+ if len(li) > 1:
+ self.infile = li
+ else:
+ self.infile = infile
+ self.verbose = verbose
+ if varname == 'None' or varname is None:
+ self.varname = varname
+ else:
+ li = list(varname.split(","))
+ self.varname = li
+ self.write_all = write_all
+ self.keep_attributes = keep_attributes
+ if self.keep_attributes:
+ xr.set_options(keep_attrs=True)
+ self.filter = filter_list
+ self.selection = {}
+ self.method = {}
+ if scale == "" or scale is None:
+ self.scale = 1
+ else:
+ self.scale = float(scale)
+ if output is None:
+ self.output = "output.netcdf"
+ else:
+ self.output = output
+ # initialization
+ self.dset = None
+ self.subset = None
+ if self.verbose:
+ print("infile: ", self.infile)
+ print("varname: ", self.varname)
+ print("filter_list: ", self.filter)
+ print("scale: ", self.scale)
+ print("write_all: ", self.write_all)
+ print("keep_attributes: ", self.keep_attributes)
+ print("sel: ", self.sel)
+ print("output: ", self.output)
+
+ def apply_selection(self):
+ self.dset = self.ds
+ for key in self.selection:
+ if 'slice' in str(self.selection[key]):
+ self.dset = self.dset.sel(
+ {key: self.selection[key]}
+ )
+ else:
+ self.dset = self.dset.sel(
+ {key: self.selection[key]},
+ method=self.method[key]
+ )
+
+ def dimension_selection(self, single_filter):
+ split_filter = single_filter.split('#')
+ dimension_varname = split_filter[0]
+ op = split_filter[1]
+ if self.sel:
+ ll = float(split_filter[2])
+ else:
+ ll = int(split_filter[2])
+ if (op == 'sl'):
+ if self.sel:
+ rl = float(split_filter[3])
+ else:
+ rl = int(split_filter[3])
+ self.selection[dimension_varname] = slice(ll, rl)
+ elif (op == 'to'):
+ self.selection[dimension_varname] = slice(None, ll)
+ elif (op == 'from'):
+ self.selection[dimension_varname] = slice(ll, None)
+ elif (op == 'is'):
+ self.selection[dimension_varname] = ll
+ if self.sel:
+ rl = split_filter[3]
+ if 'None' in rl:
+ self.method[dimension_varname] = None
+ else:
+ self.method[dimension_varname] = rl
+
+ def filter_selection(self):
+ for single_filter in self.filter:
+ self.dimension_selection(single_filter)
+
+ if self.sel:
+ self.apply_selection()
+ else:
+ self.dset = \
+ self.ds.isel(self.selection)
+
+ if self.varname != 'None' and self.varname is not None:
+ for var in self.varname:
+ self.dset[var] = \
+ self.dset[var]*self.scale
+
+ def compute(self):
+ if self.dset is None:
+ if type(self.infile) is list:
+ self.ds = xr.open_mfdataset(self.infile)
+ else:
+ self.ds = xr.open_dataset(self.infile)
+ if self.where != "":
+ if self.drop:
+ if self.verbose:
+ print("Where with drop=True")
+ self.ds = self.ds.where(
+ self.eval_where(self.where),
+ drop=True
+ )
+ elif self.other is not None and self.other != "":
+ if self.verbose:
+ print("Where with other=", float(self.other))
+ self.ds = self.ds.where(
+ self.eval_where(self.where),
+ other=float(self.other)
+ )
+ else:
+ self.ds = self.ds.where(
+ self.eval_where(self.where)
+ )
+ self.filter_selection()
+ if self.verbose:
+ print(self.selection)
+
+ def save(self):
+ if self.varname != 'None' and \
+ self.varname is not None and \
+ not self.write_all:
+ self.dset[self.varname].to_netcdf(self.output)
+ else:
+ self.dset.to_netcdf(self.output)
+
+ def is_float(self, element) -> bool:
+ try:
+ float(element)
+ return True
+ except ValueError:
+ return False
+
+ def eval_where(self, where_condition):
+ eval_cond = None
+ list_names = list(set(
+ list(self.ds.keys()) +
+ list(self.ds.coords.keys()))
+ )
+ wcond = where_condition
+ check_cond = where_condition
+ for var in list_names:
+ wcond = wcond.replace(var, ' self.ds.' + var + ' ')
+ check_cond = check_cond.replace(var, '')
+ to_remove = "[><=&|()]"
+ check_cond = re.sub(to_remove, "", check_cond).replace("!", "")
+ check_cond = re.sub(' +', ' ', check_cond).strip()
+ list_flt = check_cond.split(" ")
+ no_convert = False
+ for num in list_flt:
+ if not self.is_float(num):
+ no_convert = True
+ if not no_convert:
+ eval_cond = eval(wcond)
+ return eval_cond
+
+
+if __name__ == '__main__':
+ warnings.filterwarnings("ignore")
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'input',
+ help='input filename in netCDF format'
+ )
+ parser.add_argument(
+ 'varname',
+ help='Specify which variable to plot (case sensitive)'
+ )
+ parser.add_argument(
+ '--filter',
+ nargs="*",
+ help='Filter list variable#operator#value_s#value_e'
+ )
+ parser.add_argument(
+ '--where',
+ help='filename with where condition to be evaluated'
+ )
+ parser.add_argument(
+ '--output',
+ help='Output filename to store the resulting netCDF file'
+ )
+ parser.add_argument(
+ '--scale',
+ help='scale factor to apply to selection (float)'
+ )
+ parser.add_argument(
+ '--other',
+ help='Value to use for locations where condition is False (float)'
+ )
+ parser.add_argument(
+ "--write_all",
+ help="write all data to netCDF",
+ action="store_true")
+ parser.add_argument(
+ "--keep_attributes",
+ help="Keep all attributes",
+ action="store_true")
+ parser.add_argument(
+ "-v", "--verbose",
+ help="switch on verbose mode",
+ action="store_true")
+ parser.add_argument(
+ "--selection",
+ help="select by values",
+ action="store_true")
+ parser.add_argument(
+ "--drop",
+ help="drop values where condition is not met",
+ action="store_true")
+ args = parser.parse_args()
+
+ print("args.selection", args.selection)
+ dset = netCDF2netCDF(infile=args.input, varname=args.varname,
+ scale=args.scale, output=args.output,
+ write_all=args.write_all,
+ sel=args.selection,
+ keep_attributes=args.keep_attributes,
+ filter_list=args.filter,
+ where_config=args.where,
+ drop=args.drop, other=args.other,
+ verbose=args.verbose)
+ dset.compute()
+ dset.save()
diff -r 123a9a629bef -r bf595d613af4 xarray_select.xml
--- a/xarray_select.xml Sun Jun 06 08:51:41 2021 +0000
+++ b/xarray_select.xml Thu Jan 20 17:07:19 2022 +0000
@@ -1,317 +1,317 @@
-
- extracts variable values with custom conditions on dimensions
-
- macros.xml
-
-
-
- python
- netcdf4
- xarray
- geopandas
- shapely
-
- output_dir/version.tabular &&
- #end if
- python '$__tool_directory__/xarray_tool.py' '$input' --select '$var'
- --verbose
- --filter
- #for $i,$uc in enumerate($user_choice)
- #if $uc.condi_between.comparator=="bi"
- '${uc.dim}#${uc.condi_between.comparator}#${uc.condi_between.t1}#${uc.condi_between.t2}'
- #else
- '${uc.dim}#${uc.condi_between.comparator}#${uc.condi_between.value}'
- #end if
- #end for
-
- #if $time.condi_datetime.datetime=="yes"
- --time
- #if $time.condi_datetime.condi_between.comparator=="sl"
- '${time.condi_datetime.dim}#${time.condi_datetime.condi_between.comparator}#${time.condi_datetime.condi_between.t1}#${time.condi_datetime.condi_between.t2}'
- #else
- '${time.condi_datetime.dim}#${time.condi_datetime.condi_between.comparator}#${time.condi_datetime.condi_between.t1}'
- #end if
- #end if
-
- #if $condi_source_coord.coord_source=="coord_from_file"
- --coords '$coord_tabular'
- --latname '$condi_source_coord.lat_dim' --lonname '$condi_source_coord.lon_dim'
- --outputdir output_dir
- #if $condi_source_coord.tolerance
- --tolerance '$condi_source_coord.tolerance'
- #end if
- #else
- --outfile 'final.tabular'
- #if $condi_source_coord.condi_coord.coord=='single'
- --latname $condi_source_coord.condi_coord.lat_dim
- --latvalN $condi_source_coord.condi_coord.lat_val
- --lonname $condi_source_coord.condi_coord.lon_dim
- --lonvalE $condi_source_coord.condi_coord.lon_val
- #if $condi_source_coord.condi_coord.no_missing
- --no_missing
- #end if
- #if $condi_source_coord.condi_coord.tolerance
- --tolerance '$condi_source_coord.condi_coord.tolerance'
- #end if
- #elif $condi_source_coord.condi_coord.coord=='subregion'
- --latname $condi_source_coord.condi_coord.lat_dim
- --latvalN $condi_source_coord.condi_coord.lat_valN
- --latvalS $condi_source_coord.condi_coord.lat_valS
- --lonname $condi_source_coord.condi_coord.lon_dim
- --lonvalE $condi_source_coord.condi_coord.lon_valE
- --lonvalW $condi_source_coord.condi_coord.lon_valW
- #end if
- #end if
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- condi_source_coord['coord_source'] == 'coord_from_file'
-
-
- condi_source_coord['coord_source'] == 'coord_from_stdin'
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- , <, >=, <=, [interval], ]interval[.
-
-
-
-**Input**
-
-A netcdf file (.nc).
-
-Variable tabular file from 'Netcdf Metadate Info'.
-
-Tabular file with coordinates (only coordinates, no header!) and the following structure : 'lat' 'lon'.
-
-
-**Outputs**
-
-A single output with values for the wanted variable if there is only one coordinate.
-
-A data collection where one file is created for every coordinate, if multiple coordinates from tabular file.
-
-
--------------------------------------------------
-
-The xarray select tool can be used after the xarray Info.
- ]]>
-
-
+
+ extracts variable values with custom conditions on dimensions
+
+ macros.xml
+
+
+
+ xarray
+ python
+ netcdf4
+ geopandas
+ shapely
+
+ output_dir/version.tabular &&
+ #end if
+ python '$__tool_directory__/xarray_tool.py' '$input' --select '$var'
+ --verbose
+ --filter
+ #for $i,$uc in enumerate($user_choice)
+ #if $uc.condi_between.comparator=="bi"
+ '${uc.dim}#${uc.condi_between.comparator}#${uc.condi_between.t1}#${uc.condi_between.t2}'
+ #else
+ '${uc.dim}#${uc.condi_between.comparator}#${uc.condi_between.value}'
+ #end if
+ #end for
+
+ #if $time.condi_datetime.datetime=="yes"
+ --time
+ #if $time.condi_datetime.condi_between.comparator=="sl"
+ '${time.condi_datetime.dim}#${time.condi_datetime.condi_between.comparator}#${time.condi_datetime.condi_between.t1}#${time.condi_datetime.condi_between.t2}'
+ #else
+ '${time.condi_datetime.dim}#${time.condi_datetime.condi_between.comparator}#${time.condi_datetime.condi_between.t1}'
+ #end if
+ #end if
+
+ #if $condi_source_coord.coord_source=="coord_from_file"
+ --coords '$coord_tabular'
+ --latname '$condi_source_coord.lat_dim' --lonname '$condi_source_coord.lon_dim'
+ --outputdir output_dir
+ #if $condi_source_coord.tolerance
+ --tolerance '$condi_source_coord.tolerance'
+ #end if
+ #else
+ --outfile 'final.tabular'
+ #if $condi_source_coord.condi_coord.coord=='single'
+ --latname $condi_source_coord.condi_coord.lat_dim
+ --latvalN $condi_source_coord.condi_coord.lat_val
+ --lonname $condi_source_coord.condi_coord.lon_dim
+ --lonvalE $condi_source_coord.condi_coord.lon_val
+ #if $condi_source_coord.condi_coord.no_missing
+ --no_missing
+ #end if
+ #if $condi_source_coord.condi_coord.tolerance
+ --tolerance '$condi_source_coord.condi_coord.tolerance'
+ #end if
+ #elif $condi_source_coord.condi_coord.coord=='subregion'
+ --latname $condi_source_coord.condi_coord.lat_dim
+ --latvalN $condi_source_coord.condi_coord.lat_valN
+ --latvalS $condi_source_coord.condi_coord.lat_valS
+ --lonname $condi_source_coord.condi_coord.lon_dim
+ --lonvalE $condi_source_coord.condi_coord.lon_valE
+ --lonvalW $condi_source_coord.condi_coord.lon_valW
+ #end if
+ #end if
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ condi_source_coord['coord_source'] == 'coord_from_file'
+
+
+ condi_source_coord['coord_source'] == 'coord_from_stdin'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ , <, >=, <=, [interval], ]interval[.
+
+
+
+**Input**
+
+A netcdf file (.nc).
+
+Variable tabular file from 'Netcdf Metadate Info'.
+
+Tabular file with coordinates (only coordinates, no header!) and the following structure : 'lat' 'lon'.
+
+
+**Outputs**
+
+A single output with values for the wanted variable if there is only one coordinate.
+
+A data collection where one file is created for every coordinate, if multiple coordinates from tabular file.
+
+
+-------------------------------------------------
+
+The xarray select tool can be used after the xarray Info.
+ ]]>
+
+
diff -r 123a9a629bef -r bf595d613af4 xarray_tool.py
--- a/xarray_tool.py Sun Jun 06 08:51:41 2021 +0000
+++ b/xarray_tool.py Thu Jan 20 17:07:19 2022 +0000
@@ -1,365 +1,365 @@
-# xarray tool for:
-# - getting metadata information
-# - select data and save results in csv file for further post-processing
-
-import argparse
-import csv
-import os
-import warnings
-
-import geopandas as gdp
-
-import pandas as pd
-
-from shapely.geometry import Point
-from shapely.ops import nearest_points
-
-import xarray as xr
-
-
-class XarrayTool ():
- def __init__(self, infile, outfile_info="", outfile_summary="",
- select="", outfile="", outputdir="", latname="",
- latvalN="", latvalS="", lonname="", lonvalE="",
- lonvalW="", filter_list="", coords="", time="",
- verbose=False, no_missing=False, coords_info=None,
- tolerance=None):
- self.infile = infile
- self.outfile_info = outfile_info
- self.outfile_summary = outfile_summary
- self.select = select
- self.outfile = outfile
- self.outputdir = outputdir
- self.latname = latname
- if tolerance != "" and tolerance is not None:
- self.tolerance = float(tolerance)
- else:
- self.tolerance = -1
- if latvalN != "" and latvalN is not None:
- self.latvalN = float(latvalN)
- else:
- self.latvalN = ""
- if latvalS != "" and latvalS is not None:
- self.latvalS = float(latvalS)
- else:
- self.latvalS = ""
- self.lonname = lonname
- if lonvalE != "" and lonvalE is not None:
- self.lonvalE = float(lonvalE)
- else:
- self.lonvalE = ""
- if lonvalW != "" and lonvalW is not None:
- self.lonvalW = float(lonvalW)
- else:
- self.lonvalW = ""
- self.filter = filter_list
- self.time = time
- self.coords = coords
- self.verbose = verbose
- self.no_missing = no_missing
- # initialization
- self.dset = None
- self.gset = None
- self.coords_info = coords_info
- if self.verbose:
- print("infile: ", self.infile)
- print("outfile_info: ", self.outfile_info)
- print("outfile_summary: ", self.outfile_summary)
- print("outfile: ", self.outfile)
- print("select: ", self.select)
- print("outfile: ", self.outfile)
- print("outputdir: ", self.outputdir)
- print("latname: ", self.latname)
- print("latvalN: ", self.latvalN)
- print("latvalS: ", self.latvalS)
- print("lonname: ", self.lonname)
- print("lonvalE: ", self.lonvalE)
- print("lonvalW: ", self.lonvalW)
- print("filter: ", self.filter)
- print("time: ", self.time)
- print("coords: ", self.coords)
- print("coords_info: ", self.coords_info)
-
- def info(self):
- f = open(self.outfile_info, 'w')
- ds = xr.open_dataset(self.infile)
- ds.info(f)
- f.close()
-
- def summary(self):
- f = open(self.outfile_summary, 'w')
- ds = xr.open_dataset(self.infile)
- writer = csv.writer(f, delimiter='\t')
- header = ['VariableName', 'NumberOfDimensions']
- for idx, val in enumerate(ds.dims.items()):
- header.append('Dim' + str(idx) + 'Name')
- header.append('Dim' + str(idx) + 'Size')
- writer.writerow(header)
- for name, da in ds.data_vars.items():
- line = [name]
- line.append(len(ds[name].shape))
- for d, s in zip(da.shape, da.sizes):
- line.append(s)
- line.append(d)
- writer.writerow(line)
- for name, da in ds.coords.items():
- line = [name]
- line.append(len(ds[name].shape))
- for d, s in zip(da.shape, da.sizes):
- line.append(s)
- line.append(d)
- writer.writerow(line)
- f.close()
-
- def rowfilter(self, single_filter):
- split_filter = single_filter.split('#')
- filter_varname = split_filter[0]
- op = split_filter[1]
- ll = float(split_filter[2])
- if (op == 'bi'):
- rl = float(split_filter[3])
- if filter_varname == self.select:
- # filter on values of the selected variable
- if op == 'bi':
- self.dset = self.dset.where(
- (self.dset <= rl) & (self.dset >= ll)
- )
- elif op == 'le':
- self.dset = self.dset.where(self.dset <= ll)
- elif op == 'ge':
- self.dset = self.dset.where(self.dset >= ll)
- elif op == 'e':
- self.dset = self.dset.where(self.dset == ll)
- else: # filter on other dimensions of the selected variable
- if op == 'bi':
- self.dset = self.dset.sel({filter_varname: slice(ll, rl)})
- elif op == 'le':
- self.dset = self.dset.sel({filter_varname: slice(None, ll)})
- elif op == 'ge':
- self.dset = self.dset.sel({filter_varname: slice(ll, None)})
- elif op == 'e':
- self.dset = self.dset.sel({filter_varname: ll},
- method='nearest')
-
- def selection(self):
- if self.dset is None:
- self.ds = xr.open_dataset(self.infile)
- self.dset = self.ds[self.select] # select variable
- if self.time:
- self.datetime_selection()
- if self.filter:
- self.filter_selection()
-
- self.area_selection()
- if self.gset.count() > 1:
- # convert to dataframe if several rows and cols
- self.gset = self.gset.to_dataframe().dropna(how='all'). \
- reset_index()
- self.gset.to_csv(self.outfile, header=True, sep='\t')
- else:
- data = {
- self.latname: [self.gset[self.latname].values],
- self.lonname: [self.gset[self.lonname].values],
- self.select: [self.gset.values]
- }
-
- df = pd.DataFrame(data, columns=[self.latname, self.lonname,
- self.select])
- df.to_csv(self.outfile, header=True, sep='\t')
-
- def datetime_selection(self):
- split_filter = self.time.split('#')
- time_varname = split_filter[0]
- op = split_filter[1]
- ll = split_filter[2]
- if (op == 'sl'):
- rl = split_filter[3]
- self.dset = self.dset.sel({time_varname: slice(ll, rl)})
- elif (op == 'to'):
- self.dset = self.dset.sel({time_varname: slice(None, ll)})
- elif (op == 'from'):
- self.dset = self.dset.sel({time_varname: slice(ll, None)})
- elif (op == 'is'):
- self.dset = self.dset.sel({time_varname: ll}, method='nearest')
-
- def filter_selection(self):
- for single_filter in self.filter:
- self.rowfilter(single_filter)
-
- def area_selection(self):
-
- if self.latvalS != "" and self.lonvalW != "":
- # Select geographical area
- self.gset = self.dset.sel({self.latname:
- slice(self.latvalS, self.latvalN),
- self.lonname:
- slice(self.lonvalW, self.lonvalE)})
- elif self.latvalN != "" and self.lonvalE != "":
- # select nearest location
- if self.no_missing:
- self.nearest_latvalN = self.latvalN
- self.nearest_lonvalE = self.lonvalE
- else:
- # find nearest location without NaN values
- self.nearest_location()
- if self.tolerance > 0:
- self.gset = self.dset.sel({self.latname: self.nearest_latvalN,
- self.lonname: self.nearest_lonvalE},
- method='nearest',
- tolerance=self.tolerance)
- else:
- self.gset = self.dset.sel({self.latname: self.nearest_latvalN,
- self.lonname: self.nearest_lonvalE},
- method='nearest')
- else:
- self.gset = self.dset
-
- def nearest_location(self):
- # Build a geopandas dataframe with all first elements in each dimension
- # so we assume null values correspond to a mask that is the same for
- # all dimensions in the dataset.
- dsel_frame = self.dset
- for dim in self.dset.dims:
- if dim != self.latname and dim != self.lonname:
- dsel_frame = dsel_frame.isel({dim: 0})
- # transform to pandas dataframe
- dff = dsel_frame.to_dataframe().dropna().reset_index()
- # transform to geopandas to collocate
- gdf = gdp.GeoDataFrame(dff,
- geometry=gdp.points_from_xy(dff[self.lonname],
- dff[self.latname]))
- # Find nearest location where values are not null
- point = Point(self.lonvalE, self.latvalN)
- multipoint = gdf.geometry.unary_union
- queried_geom, nearest_geom = nearest_points(point, multipoint)
- self.nearest_latvalN = nearest_geom.y
- self.nearest_lonvalE = nearest_geom.x
-
- def selection_from_coords(self):
- fcoords = pd.read_csv(self.coords, sep='\t')
- for row in fcoords.itertuples():
- self.latvalN = row[0]
- self.lonvalE = row[1]
- self.outfile = (os.path.join(self.outputdir,
- self.select + '_' +
- str(row.Index) + '.tabular'))
- self.selection()
-
- def get_coords_info(self):
- ds = xr.open_dataset(self.infile)
- for c in ds.coords:
- filename = os.path.join(self.coords_info,
- c.strip() +
- '.tabular')
- pd = ds.coords[c].to_pandas()
- pd.index = range(len(pd))
- pd.to_csv(filename, header=False, sep='\t')
-
-
-if __name__ == '__main__':
- warnings.filterwarnings("ignore")
- parser = argparse.ArgumentParser()
-
- parser.add_argument(
- 'infile',
- help='netCDF input filename'
- )
- parser.add_argument(
- '--info',
- help='Output filename where metadata information is stored'
- )
- parser.add_argument(
- '--summary',
- help='Output filename where data summary information is stored'
- )
- parser.add_argument(
- '--select',
- help='Variable name to select'
- )
- parser.add_argument(
- '--latname',
- help='Latitude name'
- )
- parser.add_argument(
- '--latvalN',
- help='North latitude value'
- )
- parser.add_argument(
- '--latvalS',
- help='South latitude value'
- )
- parser.add_argument(
- '--lonname',
- help='Longitude name'
- )
- parser.add_argument(
- '--lonvalE',
- help='East longitude value'
- )
- parser.add_argument(
- '--lonvalW',
- help='West longitude value'
- )
- parser.add_argument(
- '--tolerance',
- help='Maximum distance between original and selected value for '
- ' inexact matches e.g. abs(index[indexer] - target) <= tolerance'
- )
- parser.add_argument(
- '--coords',
- help='Input file containing Latitude and Longitude'
- 'for geographical selection'
- )
- parser.add_argument(
- '--coords_info',
- help='output-folder where for each coordinate, coordinate values '
- ' are being printed in the corresponding outputfile'
- )
- parser.add_argument(
- '--filter',
- nargs="*",
- help='Filter list variable#operator#value_s#value_e'
- )
- parser.add_argument(
- '--time',
- help='select timeseries variable#operator#value_s[#value_e]'
- )
- parser.add_argument(
- '--outfile',
- help='csv outfile for storing results of the selection'
- '(valid only when --select)'
- )
- parser.add_argument(
- '--outputdir',
- help='folder name for storing results with multiple selections'
- '(valid only when --select)'
- )
- parser.add_argument(
- "-v", "--verbose",
- help="switch on verbose mode",
- action="store_true"
- )
- parser.add_argument(
- "--no_missing",
- help="""Do not take into account possible null/missing values
- (only valid for single location)""",
- action="store_true"
- )
- args = parser.parse_args()
-
- p = XarrayTool(args.infile, args.info, args.summary, args.select,
- args.outfile, args.outputdir, args.latname,
- args.latvalN, args.latvalS, args.lonname,
- args.lonvalE, args.lonvalW, args.filter,
- args.coords, args.time, args.verbose,
- args.no_missing, args.coords_info, args.tolerance)
- if args.info:
- p.info()
- if args.summary:
- p.summary()
- if args.coords:
- p.selection_from_coords()
- elif args.select:
- p.selection()
- elif args.coords_info:
- p.get_coords_info()
+# xarray tool for:
+# - getting metadata information
+# - select data and save results in csv file for further post-processing
+
+import argparse
+import csv
+import os
+import warnings
+
+import geopandas as gdp
+
+import pandas as pd
+
+from shapely.geometry import Point
+from shapely.ops import nearest_points
+
+import xarray as xr
+
+
+class XarrayTool ():
+ def __init__(self, infile, outfile_info="", outfile_summary="",
+ select="", outfile="", outputdir="", latname="",
+ latvalN="", latvalS="", lonname="", lonvalE="",
+ lonvalW="", filter_list="", coords="", time="",
+ verbose=False, no_missing=False, coords_info=None,
+ tolerance=None):
+ self.infile = infile
+ self.outfile_info = outfile_info
+ self.outfile_summary = outfile_summary
+ self.select = select
+ self.outfile = outfile
+ self.outputdir = outputdir
+ self.latname = latname
+ if tolerance != "" and tolerance is not None:
+ self.tolerance = float(tolerance)
+ else:
+ self.tolerance = -1
+ if latvalN != "" and latvalN is not None:
+ self.latvalN = float(latvalN)
+ else:
+ self.latvalN = ""
+ if latvalS != "" and latvalS is not None:
+ self.latvalS = float(latvalS)
+ else:
+ self.latvalS = ""
+ self.lonname = lonname
+ if lonvalE != "" and lonvalE is not None:
+ self.lonvalE = float(lonvalE)
+ else:
+ self.lonvalE = ""
+ if lonvalW != "" and lonvalW is not None:
+ self.lonvalW = float(lonvalW)
+ else:
+ self.lonvalW = ""
+ self.filter = filter_list
+ self.time = time
+ self.coords = coords
+ self.verbose = verbose
+ self.no_missing = no_missing
+ # initialization
+ self.dset = None
+ self.gset = None
+ self.coords_info = coords_info
+ if self.verbose:
+ print("infile: ", self.infile)
+ print("outfile_info: ", self.outfile_info)
+ print("outfile_summary: ", self.outfile_summary)
+ print("outfile: ", self.outfile)
+ print("select: ", self.select)
+ print("outfile: ", self.outfile)
+ print("outputdir: ", self.outputdir)
+ print("latname: ", self.latname)
+ print("latvalN: ", self.latvalN)
+ print("latvalS: ", self.latvalS)
+ print("lonname: ", self.lonname)
+ print("lonvalE: ", self.lonvalE)
+ print("lonvalW: ", self.lonvalW)
+ print("filter: ", self.filter)
+ print("time: ", self.time)
+ print("coords: ", self.coords)
+ print("coords_info: ", self.coords_info)
+
+ def info(self):
+ f = open(self.outfile_info, 'w')
+ ds = xr.open_dataset(self.infile)
+ ds.info(f)
+ f.close()
+
+ def summary(self):
+ f = open(self.outfile_summary, 'w')
+ ds = xr.open_dataset(self.infile)
+ writer = csv.writer(f, delimiter='\t')
+ header = ['VariableName', 'NumberOfDimensions']
+ for idx, val in enumerate(ds.dims.items()):
+ header.append('Dim' + str(idx) + 'Name')
+ header.append('Dim' + str(idx) + 'Size')
+ writer.writerow(header)
+ for name, da in ds.data_vars.items():
+ line = [name]
+ line.append(len(ds[name].shape))
+ for d, s in zip(da.shape, da.sizes):
+ line.append(s)
+ line.append(d)
+ writer.writerow(line)
+ for name, da in ds.coords.items():
+ line = [name]
+ line.append(len(ds[name].shape))
+ for d, s in zip(da.shape, da.sizes):
+ line.append(s)
+ line.append(d)
+ writer.writerow(line)
+ f.close()
+
+ def rowfilter(self, single_filter):
+ split_filter = single_filter.split('#')
+ filter_varname = split_filter[0]
+ op = split_filter[1]
+ ll = float(split_filter[2])
+ if (op == 'bi'):
+ rl = float(split_filter[3])
+ if filter_varname == self.select:
+ # filter on values of the selected variable
+ if op == 'bi':
+ self.dset = self.dset.where(
+ (self.dset <= rl) & (self.dset >= ll)
+ )
+ elif op == 'le':
+ self.dset = self.dset.where(self.dset <= ll)
+ elif op == 'ge':
+ self.dset = self.dset.where(self.dset >= ll)
+ elif op == 'e':
+ self.dset = self.dset.where(self.dset == ll)
+ else: # filter on other dimensions of the selected variable
+ if op == 'bi':
+ self.dset = self.dset.sel({filter_varname: slice(ll, rl)})
+ elif op == 'le':
+ self.dset = self.dset.sel({filter_varname: slice(None, ll)})
+ elif op == 'ge':
+ self.dset = self.dset.sel({filter_varname: slice(ll, None)})
+ elif op == 'e':
+ self.dset = self.dset.sel({filter_varname: ll},
+ method='nearest')
+
+ def selection(self):
+ if self.dset is None:
+ self.ds = xr.open_dataset(self.infile)
+ self.dset = self.ds[self.select] # select variable
+ if self.time:
+ self.datetime_selection()
+ if self.filter:
+ self.filter_selection()
+
+ self.area_selection()
+ if self.gset.count() > 1:
+ # convert to dataframe if several rows and cols
+ self.gset = self.gset.to_dataframe().dropna(how='all'). \
+ reset_index()
+ self.gset.to_csv(self.outfile, header=True, sep='\t')
+ else:
+ data = {
+ self.latname: [self.gset[self.latname].values],
+ self.lonname: [self.gset[self.lonname].values],
+ self.select: [self.gset.values]
+ }
+
+ df = pd.DataFrame(data, columns=[self.latname, self.lonname,
+ self.select])
+ df.to_csv(self.outfile, header=True, sep='\t')
+
+ def datetime_selection(self):
+ split_filter = self.time.split('#')
+ time_varname = split_filter[0]
+ op = split_filter[1]
+ ll = split_filter[2]
+ if (op == 'sl'):
+ rl = split_filter[3]
+ self.dset = self.dset.sel({time_varname: slice(ll, rl)})
+ elif (op == 'to'):
+ self.dset = self.dset.sel({time_varname: slice(None, ll)})
+ elif (op == 'from'):
+ self.dset = self.dset.sel({time_varname: slice(ll, None)})
+ elif (op == 'is'):
+ self.dset = self.dset.sel({time_varname: ll}, method='nearest')
+
+ def filter_selection(self):
+ for single_filter in self.filter:
+ self.rowfilter(single_filter)
+
+ def area_selection(self):
+
+ if self.latvalS != "" and self.lonvalW != "":
+ # Select geographical area
+ self.gset = self.dset.sel({self.latname:
+ slice(self.latvalS, self.latvalN),
+ self.lonname:
+ slice(self.lonvalW, self.lonvalE)})
+ elif self.latvalN != "" and self.lonvalE != "":
+ # select nearest location
+ if self.no_missing:
+ self.nearest_latvalN = self.latvalN
+ self.nearest_lonvalE = self.lonvalE
+ else:
+ # find nearest location without NaN values
+ self.nearest_location()
+ if self.tolerance > 0:
+ self.gset = self.dset.sel({self.latname: self.nearest_latvalN,
+ self.lonname: self.nearest_lonvalE},
+ method='nearest',
+ tolerance=self.tolerance)
+ else:
+ self.gset = self.dset.sel({self.latname: self.nearest_latvalN,
+ self.lonname: self.nearest_lonvalE},
+ method='nearest')
+ else:
+ self.gset = self.dset
+
+ def nearest_location(self):
+ # Build a geopandas dataframe with all first elements in each dimension
+ # so we assume null values correspond to a mask that is the same for
+ # all dimensions in the dataset.
+ dsel_frame = self.dset
+ for dim in self.dset.dims:
+ if dim != self.latname and dim != self.lonname:
+ dsel_frame = dsel_frame.isel({dim: 0})
+ # transform to pandas dataframe
+ dff = dsel_frame.to_dataframe().dropna().reset_index()
+ # transform to geopandas to collocate
+ gdf = gdp.GeoDataFrame(dff,
+ geometry=gdp.points_from_xy(dff[self.lonname],
+ dff[self.latname]))
+ # Find nearest location where values are not null
+ point = Point(self.lonvalE, self.latvalN)
+ multipoint = gdf.geometry.unary_union
+ queried_geom, nearest_geom = nearest_points(point, multipoint)
+ self.nearest_latvalN = nearest_geom.y
+ self.nearest_lonvalE = nearest_geom.x
+
+ def selection_from_coords(self):
+ fcoords = pd.read_csv(self.coords, sep='\t')
+ for row in fcoords.itertuples():
+ self.latvalN = row[0]
+ self.lonvalE = row[1]
+ self.outfile = (os.path.join(self.outputdir,
+ self.select + '_' +
+ str(row.Index) + '.tabular'))
+ self.selection()
+
+ def get_coords_info(self):
+ ds = xr.open_dataset(self.infile)
+ for c in ds.coords:
+ filename = os.path.join(self.coords_info,
+ c.strip() +
+ '.tabular')
+ pd = ds.coords[c].to_pandas()
+ pd.index = range(len(pd))
+ pd.to_csv(filename, header=False, sep='\t')
+
+
+if __name__ == '__main__':
+ warnings.filterwarnings("ignore")
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument(
+ 'infile',
+ help='netCDF input filename'
+ )
+ parser.add_argument(
+ '--info',
+ help='Output filename where metadata information is stored'
+ )
+ parser.add_argument(
+ '--summary',
+ help='Output filename where data summary information is stored'
+ )
+ parser.add_argument(
+ '--select',
+ help='Variable name to select'
+ )
+ parser.add_argument(
+ '--latname',
+ help='Latitude name'
+ )
+ parser.add_argument(
+ '--latvalN',
+ help='North latitude value'
+ )
+ parser.add_argument(
+ '--latvalS',
+ help='South latitude value'
+ )
+ parser.add_argument(
+ '--lonname',
+ help='Longitude name'
+ )
+ parser.add_argument(
+ '--lonvalE',
+ help='East longitude value'
+ )
+ parser.add_argument(
+ '--lonvalW',
+ help='West longitude value'
+ )
+ parser.add_argument(
+ '--tolerance',
+ help='Maximum distance between original and selected value for '
+ ' inexact matches e.g. abs(index[indexer] - target) <= tolerance'
+ )
+ parser.add_argument(
+ '--coords',
+ help='Input file containing Latitude and Longitude'
+ 'for geographical selection'
+ )
+ parser.add_argument(
+ '--coords_info',
+ help='output-folder where for each coordinate, coordinate values '
+ ' are being printed in the corresponding outputfile'
+ )
+ parser.add_argument(
+ '--filter',
+ nargs="*",
+ help='Filter list variable#operator#value_s#value_e'
+ )
+ parser.add_argument(
+ '--time',
+ help='select timeseries variable#operator#value_s[#value_e]'
+ )
+ parser.add_argument(
+ '--outfile',
+ help='csv outfile for storing results of the selection'
+ '(valid only when --select)'
+ )
+ parser.add_argument(
+ '--outputdir',
+ help='folder name for storing results with multiple selections'
+ '(valid only when --select)'
+ )
+ parser.add_argument(
+ "-v", "--verbose",
+ help="switch on verbose mode",
+ action="store_true"
+ )
+ parser.add_argument(
+ "--no_missing",
+ help="""Do not take into account possible null/missing values
+ (only valid for single location)""",
+ action="store_true"
+ )
+ args = parser.parse_args()
+
+ p = XarrayTool(args.infile, args.info, args.summary, args.select,
+ args.outfile, args.outputdir, args.latname,
+ args.latvalN, args.latvalS, args.lonname,
+ args.lonvalE, args.lonvalW, args.filter,
+ args.coords, args.time, args.verbose,
+ args.no_missing, args.coords_info, args.tolerance)
+ if args.info:
+ p.info()
+ if args.summary:
+ p.summary()
+ if args.coords:
+ p.selection_from_coords()
+ elif args.select:
+ p.selection()
+ elif args.coords_info:
+ p.get_coords_info()