# HG changeset patch # User ecology # Date 1642698474 0 # Node ID 3e73f657a9980867187ee3af405af4c30b0d1713 # Parent 6015f30a7258e1511f19e416089528d7a2c74c67 "planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit 2166974df82f97557b082a9e55135098e61640c4" diff -r 6015f30a7258 -r 3e73f657a998 README.md --- a/README.md Sun Aug 29 16:46:54 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -# Xarray tools for netCDF -## netCDF metadata information - -The first tool `xarray_metadata_info ` uses xarray to provide users with general information about variable names, dimensions -and attributes. -Variables that can be extracted and dimensions available are printed in a tabular file. - -The tool also print a general information file. It's the result of the xarray method info(). diff -r 6015f30a7258 -r 3e73f657a998 macros.xml --- a/macros.xml Sun Aug 29 16:46:54 2021 +0000 +++ b/macros.xml Thu Jan 20 17:07:54 2022 +0000 @@ -1,185 +1,28 @@ - - 0.18.2 - 0 - - - topic_0610 - topic_3050 - - - - - - @article{hoyer2017xarray, - title = {xarray: {N-D} labeled arrays and datasets in {Python}}, - author = {Hoyer, S. and J. Hamman}, - journal = {Journal of Open Research Software}, - volume = {5}, - number = {1}, - year = {2017}, - publisher = {Ubiquity Press}, - doi = {10.5334/jors.148}, - url = {http://doi.org/10.5334/jors.148} - } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + 0.20.2 + 0 + 20.05 + + + topic_0610 + topic_3050 + + + + + + @article{hoyer2017xarray, + title = {xarray: {N-D} labeled arrays and datasets in {Python}}, + author = {Hoyer, S. and J. Hamman}, + journal = {Journal of Open Research Software}, + volume = {5}, + number = {1}, + year = {2017}, + publisher = {Ubiquity Press}, + doi = {10.5334/jors.148}, + url = {http://doi.org/10.5334/jors.148} + } + + + + diff -r 6015f30a7258 -r 3e73f657a998 macros_mapplot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros_mapplot.xml Thu Jan 20 17:07:54 2022 +0000 @@ -0,0 +1,213 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 6015f30a7258 -r 3e73f657a998 macros_netcdf2netcdf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros_netcdf2netcdf.xml Thu Jan 20 17:07:54 2022 +0000 @@ -0,0 +1,143 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ + + + + + + + + + + + + +
+
+
diff -r 6015f30a7258 -r 3e73f657a998 macros_tests_netcdf2netcdf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros_tests_netcdf2netcdf.xml Thu Jan 20 17:07:54 2022 +0000 @@ -0,0 +1,263 @@ + + + + + + +
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +
+ +
+ + + +
+ +
+ +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +
+
+ + + + + +
+ +
+ + + +
+ + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + +
+ +
+
+
+
diff -r 6015f30a7258 -r 3e73f657a998 test-data/all.netcdf Binary file test-data/all.netcdf has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/chl_alltimes.nc Binary file test-data/chl_alltimes.nc has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/chl_nh4.netcdf Binary file test-data/chl_nh4.netcdf has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/chl_phy_where.netcdf Binary file test-data/chl_phy_where.netcdf has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/chl_where_drop.netcdf Binary file test-data/chl_where_drop.netcdf has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/data_from_20040615.nc Binary file test-data/data_from_20040615.nc has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/data_to_20040615.nc Binary file test-data/data_to_20040615.nc has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0.png Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0.png has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0_title.png Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0_title.png has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time1.png Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time1.png has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time50.png Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time50.png has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/info_file.txt --- a/test-data/info_file.txt Sun Aug 29 16:46:54 2021 +0000 +++ b/test-data/info_file.txt Thu Jan 20 17:07:54 2022 +0000 @@ -1,9 +1,9 @@ xarray.Dataset { dimensions: + time = 145 ; depth = 1 ; latitude = 97 ; longitude = 103 ; - time = 145 ; variables: float32 phy(time, depth, latitude, longitude) ; diff -r 6015f30a7258 -r 3e73f657a998 test-data/select_by_values.netcdf Binary file test-data/select_by_values.netcdf has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/small.netcdf Binary file test-data/small.netcdf has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/small_all_variables.netcdf Binary file test-data/small_all_variables.netcdf has changed diff -r 6015f30a7258 -r 3e73f657a998 test-data/version.tabular --- a/test-data/version.tabular Sun Aug 29 16:46:54 2021 +0000 +++ b/test-data/version.tabular Thu Jan 20 17:07:54 2022 +0000 @@ -1,1 +1,1 @@ -Galaxy xarray version 0.18.2 +Galaxy xarray version 0.20.2 diff -r 6015f30a7258 -r 3e73f657a998 test-data/where_condition.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/where_condition.txt Thu Jan 20 17:07:54 2022 +0000 @@ -0,0 +1,1 @@ +((chl > 1) | (chl < 45)) & (nh4 > 1) diff -r 6015f30a7258 -r 3e73f657a998 test-data/where_condition_simple.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/where_condition_simple.txt Thu Jan 20 17:07:54 2022 +0000 @@ -0,0 +1,1 @@ +nh4 > 5.15 diff -r 6015f30a7258 -r 3e73f657a998 xarray_coords_info.xml --- a/xarray_coords_info.xml Sun Aug 29 16:46:54 2021 +0000 +++ b/xarray_coords_info.xml Thu Jan 20 17:07:54 2022 +0000 @@ -1,53 +1,60 @@ - - Get values for each coordinate of a Netcdf file - - macros.xml - - - - python - netcdf4 - xarray - geopandas - shapely - - output_dir/version.tabular && - python3 '$__tool_directory__/xarray_tool.py' '$input' --coords_info output_dir - ]]> - - - - - - - - - - - - - - - - - - - - - - - + + Get values for each coordinate of a Netcdf file + + macros.xml + + + + xarray + python + netcdf4 + geopandas + shapely + + output_dir/version.tabular && + python3 '$__tool_directory__/xarray_tool.py' '$input' --coords_info output_dir + ]]> + + + + + + + + + + + + + + + + + + + + + + + + diff -r 6015f30a7258 -r 3e73f657a998 xarray_mapplot.py --- a/xarray_mapplot.py Sun Aug 29 16:46:54 2021 +0000 +++ b/xarray_mapplot.py Thu Jan 20 17:07:54 2022 +0000 @@ -1,457 +1,411 @@ -#!/usr/bin/env python3 -# -# -# usage: xarray_mapplot.py [-h] [--proj PROJ] -# [--cmap CMAP] -# [--output OUTPUT] -# [--time TIMES] -# [--nrow NROW] -# [--ncol NCOL] -# [--title title] -# [--latitude LATITUDE] -# [--longitude LONGITUDE] -# [--land ALPHA-LAND] -# [--ocean ALPHA-OCEAN] -# [--coastline ALPHA-COASTLINE] -# [--borders ALPHA-BORDERS] -# [--xlim "x1,x2"] -# [--ylim "y1,y2"] -# [--range "valmin,valmax"] -# [--threshold VAL] -# [--label label-colorbar] -# [--shift] -# [-v] -# input varname -# -# positional arguments: -# input input filename with geographical coordinates (netCDF -# format) -# varname Specify which variable to plot (case sensitive) -# -# optional arguments: -# -h, --help show this help message and exit -# --proj PROJ Specify the projection on which we draw -# --cmap CMAP Specify which colormap to use for plotting -# --output OUTPUT output filename to store resulting image (png format) -# --time TIMES time index from the file for multiple plots ("0 1 2 3") -# --title plot or subplot title -# --latitude variable name for latitude -# --longitude variable name for longitude -# --land add land on plot with alpha value [0-1] -# --ocean add oceans on plot with alpha value [0-1] -# --coastline add coastline with alpha value [0-1] -# --borders add country borders with alpha value [0-1] -# --xlim limited geographical area longitudes "x1,x2" -# --ylim limited geographical area latitudes "y1,y2" -# --range "valmin,valmax" for plotting -# --threshold do not plot values below threshold -# --label set a label for colormap -# --shift shift longitudes if specified -# -v, --verbose switch on verbose mode -# - -import argparse -import ast -import warnings -from pathlib import Path - -import cartopy.crs as ccrs -import cartopy.feature as feature - -from cmcrameri import cm - -import matplotlib as mpl -mpl.use('Agg') -from matplotlib import pyplot # noqa: I202,E402 - -import xarray as xr # noqa: E402 - - -class MapPlotXr (): - def __init__(self, input, proj, varname, cmap, output, verbose=False, - time=[], title="", latitude="latitude", - longitude="longitude", land=0, ocean=0, - coastline=0, borders=0, xlim=[], ylim=[], - threshold="", label="", shift=False, - range_values=[]): - self.input = input - print("PROJ", proj) - if proj != "" and proj is not None: - self.proj = proj.replace('X', ':') - else: - self.proj = proj - self.varname = varname - self.get_cmap(cmap) - self.time = time - self.latitude = latitude - self.longitude = longitude - self.land = land - self.ocean = ocean - self.coastline = coastline - self.borders = borders - self.xlim = xlim - self.ylim = ylim - self.range = range_values - self.threshold = threshold - self.shift = shift - self.xylim_supported = False - self.colorbar = True - self.title = title - if output is None: - self.output = Path(input).stem + '.png' - else: - self.output = output - self.verbose = verbose - self.dset = xr.open_dataset(self.input, use_cftime=True) - - self.label = {} - if label != "" and label is not None: - self.label['label'] = label - if verbose: - print("input: ", self.input) - print("proj: ", self.proj) - print("varname: ", self.varname) - print("time: ", self.time) - print("minval, maxval: ", self.range) - print("title: ", self.title) - print("output: ", self.output) - print("label: ", self.label) - print("shift: ", self.shift) - print("ocean: ", self.ocean) - print("land: ", self.land) - print("coastline: ", self.coastline) - print("borders: ", self.borders) - print("latitude: ", self.latitude) - print("longitude: ", self.longitude) - print("xlim: ", self.xlim) - print("ylim: ", self.ylim) - - def get_cmap(self, cmap): - if cmap[0:3] == 'cm.': - self.cmap = cm.__dict__[cmap[3:]] - else: - self.cmap = cmap - - def projection(self): - if self.proj is None: - return ccrs.PlateCarree() - - proj_dict = ast.literal_eval(self.proj) - - user_proj = proj_dict.pop("proj") - if user_proj == 'PlateCarree': - self.xylim_supported = True - return ccrs.PlateCarree(**proj_dict) - elif user_proj == 'AlbersEqualArea': - return ccrs.AlbersEqualArea(**proj_dict) - elif user_proj == 'AzimuthalEquidistant': - return ccrs.AzimuthalEquidistant(**proj_dict) - elif user_proj == 'EquidistantConic': - return ccrs.EquidistantConic(**proj_dict) - elif user_proj == 'LambertConformal': - return ccrs.LambertConformal(**proj_dict) - elif user_proj == 'LambertCylindrical': - return ccrs.LambertCylindrical(**proj_dict) - elif user_proj == 'Mercator': - return ccrs.Mercator(**proj_dict) - elif user_proj == 'Miller': - return ccrs.Miller(**proj_dict) - elif user_proj == 'Mollweide': - return ccrs.Mollweide(**proj_dict) - elif user_proj == 'Orthographic': - return ccrs.Orthographic(**proj_dict) - elif user_proj == 'Robinson': - return ccrs.Robinson(**proj_dict) - elif user_proj == 'Sinusoidal': - return ccrs.Sinusoidal(**proj_dict) - elif user_proj == 'Stereographic': - return ccrs.Stereographic(**proj_dict) - elif user_proj == 'TransverseMercator': - return ccrs.TransverseMercator(**proj_dict) - elif user_proj == 'UTM': - return ccrs.UTM(**proj_dict) - elif user_proj == 'InterruptedGoodeHomolosine': - return ccrs.InterruptedGoodeHomolosine(**proj_dict) - elif user_proj == 'RotatedPole': - return ccrs.RotatedPole(**proj_dict) - elif user_proj == 'OSGB': - self.xylim_supported = False - return ccrs.OSGB(**proj_dict) - elif user_proj == 'EuroPP': - self.xylim_supported = False - return ccrs.EuroPP(**proj_dict) - elif user_proj == 'Geostationary': - return ccrs.Geostationary(**proj_dict) - elif user_proj == 'NearsidePerspective': - return ccrs.NearsidePerspective(**proj_dict) - elif user_proj == 'EckertI': - return ccrs.EckertI(**proj_dict) - elif user_proj == 'EckertII': - return ccrs.EckertII(**proj_dict) - elif user_proj == 'EckertIII': - return ccrs.EckertIII(**proj_dict) - elif user_proj == 'EckertIV': - return ccrs.EckertIV(**proj_dict) - elif user_proj == 'EckertV': - return ccrs.EckertV(**proj_dict) - elif user_proj == 'EckertVI': - return ccrs.EckertVI(**proj_dict) - elif user_proj == 'EqualEarth': - return ccrs.EqualEarth(**proj_dict) - elif user_proj == 'Gnomonic': - return ccrs.Gnomonic(**proj_dict) - elif user_proj == 'LambertAzimuthalEqualArea': - return ccrs.LambertAzimuthalEqualArea(**proj_dict) - elif user_proj == 'NorthPolarStereo': - return ccrs.NorthPolarStereo(**proj_dict) - elif user_proj == 'OSNI': - return ccrs.OSNI(**proj_dict) - elif user_proj == 'SouthPolarStereo': - return ccrs.SouthPolarStereo(**proj_dict) - - def plot(self, ts=None): - if self.shift: - if self.longitude == 'longitude': - self.dset = self.dset.assign_coords( - longitude=((( - self.dset[self.longitude] - + 180) % 360) - 180)) - elif self.longitude == 'lon': - self.dset = self.dset.assign_coords( - lon=(((self.dset[self.longitude] - + 180) % 360) - 180)) - - pyplot.figure(1, figsize=[20, 10]) - - # Set the projection to use for plotting - ax = pyplot.subplot(1, 1, 1, projection=self.projection()) - if self.land: - ax.add_feature(feature.LAND, alpha=self.land) - - if self.ocean: - ax.add_feature(feature.OCEAN, alpha=self.ocean) - if self.coastline: - ax.coastlines(resolution='10m', alpha=self.coastline) - if self.borders: - ax.add_feature(feature.BORDERS, linestyle=':', alpha=self.borders) - - if self.xlim: - min_lon = min(self.xlim[0], self.xlim[1]) - max_lon = max(self.xlim[0], self.xlim[1]) - else: - min_lon = self.dset[self.longitude].min() - max_lon = self.dset[self.longitude].max() - - if self.ylim: - min_lat = min(self.ylim[0], self.ylim[1]) - max_lat = max(self.ylim[0], self.ylim[1]) - else: - min_lat = self.dset[self.latitude].min() - max_lat = self.dset[self.latitude].max() - - if self.xylim_supported: - pyplot.xlim(min_lon, max_lon) - pyplot.ylim(min_lat, max_lat) - - # Fix extent - if self.threshold == "" or self.threshold is None: - threshold = self.dset[self.varname].min() - else: - threshold = float(self.threshold) - - if self.range == []: - minval = self.dset[self.varname].min() - maxval = self.dset[self.varname].max() - else: - minval = self.range[0] - maxval = self.range[1] - - if self.verbose: - print("minval: ", minval) - print("maxval: ", maxval) - - # pass extent with vmin and vmax parameters - proj_t = ccrs.PlateCarree() - if ts is None: - self.dset.where( - self.dset[self.varname] > threshold - )[self.varname].plot(ax=ax, - vmin=minval, - vmax=maxval, - transform=proj_t, - cmap=self.cmap, - cbar_kwargs=self.label - ) - if self.title != "" and self.title is not None: - pyplot.title(self.title) - pyplot.savefig(self.output) - else: - if self.colorbar: - self.dset.where( - self.dset[self.varname] > threshold - )[self.varname].isel(time=ts).plot(ax=ax, - vmin=minval, - vmax=maxval, - transform=proj_t, - cmap=self.cmap, - cbar_kwargs=self.label - ) - else: - self.dset.where( - self.dset[self.varname] > minval - )[self.varname].isel(time=ts).plot(ax=ax, - vmin=minval, - vmax=maxval, - transform=proj_t, - cmap=self.cmap, - add_colorbar=False) - if self.title != "" and self.title is not None: - pyplot.title(self.title + "(time = " + str(ts) + ')') - pyplot.savefig(self.output[:-4] + "_time" + str(ts) + - self.output[-4:]) # assume png format - - -if __name__ == '__main__': - warnings.filterwarnings("ignore") - parser = argparse.ArgumentParser() - parser.add_argument( - 'input', - help='input filename with geographical coordinates (netCDF format)' - ) - - parser.add_argument( - '--proj', - help='Specify the projection on which we draw' - ) - parser.add_argument( - 'varname', - help='Specify which variable to plot (case sensitive)' - ) - parser.add_argument( - '--cmap', - help='Specify which colormap to use for plotting' - ) - parser.add_argument( - '--output', - help='output filename to store resulting image (png format)' - ) - parser.add_argument( - '--time', - help='list of times to plot for multiple plots' - ) - parser.add_argument( - '--title', - help='plot title' - ) - parser.add_argument( - '--latitude', - help='variable name for latitude' - ) - parser.add_argument( - '--longitude', - help='variable name for longitude' - ) - parser.add_argument( - '--land', - help='add land on plot with alpha value [0-1]' - ) - parser.add_argument( - '--ocean', - help='add oceans on plot with alpha value [0-1]' - ) - parser.add_argument( - '--coastline', - help='add coastline with alpha value [0-1]' - ) - parser.add_argument( - '--borders', - help='add country borders with alpha value [0-1]' - ) - parser.add_argument( - '--xlim', - help='limited geographical area longitudes "x1,x2"' - ) - parser.add_argument( - '--ylim', - help='limited geographical area latitudes "y1,y2"' - ) - parser.add_argument( - '--range', - help='min and max values for plotting "minval,maxval"' - ) - parser.add_argument( - '--threshold', - help='do not plot values below threshold' - ) - parser.add_argument( - '--label', - help='set a label for colorbar' - ) - parser.add_argument( - '--shift', - help='shift longitudes if specified', - action="store_true" - ) - parser.add_argument( - "-v", "--verbose", - help="switch on verbose mode", - action="store_true") - args = parser.parse_args() - - if args.time is None: - time = [] - else: - time = list(map(int, args.time.split(","))) - if args.xlim is None: - xlim = [] - else: - xlim = list(map(float, args.xlim.split(","))) - if args.ylim is None: - ylim = [] - else: - ylim = list(map(float, args.ylim.split(","))) - if args.range is None: - range_values = [] - else: - range_values = list(map(float, args.range.split(","))) - if args.latitude is None: - latitude = "latitude" - else: - latitude = args.latitude - if args.longitude is None: - longitude = "longitude" - else: - longitude = args.longitude - if args.land is None: - land = 0 - else: - land = float(args.land) - if args.ocean is None: - ocean = 0 - else: - ocean = float(args.ocean) - if args.coastline is None: - coastline = 0 - else: - coastline = float(args.coastline) - if args.borders is None: - borders = 0 - else: - borders = float(args.borders) - - dset = MapPlotXr(input=args.input, proj=args.proj, varname=args.varname, - cmap=args.cmap, output=args.output, verbose=args.verbose, - time=time, title=args.title, - latitude=latitude, longitude=longitude, land=land, - ocean=ocean, coastline=coastline, borders=borders, - xlim=xlim, ylim=ylim, threshold=args.threshold, - label=args.label, shift=args.shift, - range_values=range_values) - - if dset.time == []: - dset.plot() - else: - for t in dset.time: - dset.plot(t) - dset.shift = False # only shift once - dset.colorbar = True +#!/usr/bin/env python3 +# +# +# usage: xarray_mapplot.py [-h] [--proj PROJ] +# [--cmap CMAP] +# [--output OUTPUT] +# [--time TIMES] +# [--nrow NROW] +# [--ncol NCOL] +# [--title title] +# [--latitude LATITUDE] +# [--longitude LONGITUDE] +# [--land ALPHA-LAND] +# [--ocean ALPHA-OCEAN] +# [--coastline ALPHA-COASTLINE] +# [--borders ALPHA-BORDERS] +# [--xlim "x1,x2"] +# [--ylim "y1,y2"] +# [--range "valmin,valmax"] +# [--threshold VAL] +# [--label label-colorbar] +# [--config config-file] +# [--shift] +# [-v] +# input varname +# +# positional arguments: +# input input filename with geographical coordinates (netCDF +# format) +# varname Specify which variable to plot (case sensitive) +# +# optional arguments: +# -h, --help show this help message and exit +# --proj PROJ Specify the projection on which we draw +# --cmap CMAP Specify which colormap to use for plotting +# --output OUTPUT output filename to store resulting image (png format) +# --time TIMES time index from the file for multiple plots ("0 1 2 3") +# --title plot or subplot title +# --latitude variable name for latitude +# --longitude variable name for longitude +# --land add land on plot with alpha value [0-1] +# --ocean add oceans on plot with alpha value [0-1] +# --coastline add coastline with alpha value [0-1] +# --borders add country borders with alpha value [0-1] +# --xlim limited geographical area longitudes "x1,x2" +# --ylim limited geographical area latitudes "y1,y2" +# --range "valmin,valmax" for plotting +# --threshold do not plot values below threshold +# --label set a label for colormap +# --config plotting parameters are passed via a config file +# (overwrite other plotting options) +# --shift shift longitudes if specified +# -v, --verbose switch on verbose mode +# + +import argparse +import ast +import warnings +from pathlib import Path + +import cartopy.crs as ccrs +import cartopy.feature as feature + +from cmcrameri import cm + +import matplotlib as mpl +mpl.use('Agg') +from matplotlib import pyplot # noqa: I202,E402 + +import xarray as xr # noqa: E402 + + +class MapPlotXr (): + def __init__(self, input, varname, output, verbose=False, + config_file="", proj="", shift=False): + + li = list(input.split(",")) + if len(li) > 1: + self.input = li + else: + self.input = input + + if proj != "" and proj is not None and Path(proj).exists(): + f = open(proj) + sdict = ''.join( + f.read().replace("\n", "").split('{')[1].split('}')[0] + ) + self.proj = '{' + sdict.strip() + '}' + else: + self.proj = None + self.varname = varname + self.shift = shift + self.xylim_supported = False + self.colorbar = True + if output is None: + if type(self.input) is list: + self.output = Path(self.input[0]).stem + '.png' + else: + self.output = Path(self.input).stem + '.png' + else: + self.output = output + self.verbose = verbose + self.label = {} + self.time = [] + self.xlim = [] + self.ylim = [] + self.range = [] + self.latitude = "latitude" + self.longitude = "longitude" + self.land = 0 + self.ocean = 0 + self.coastline = 0 + self.borders = 0 + self.cmap = "coolwarm" + self.threshold = "" + self.title = "" + + if config_file != "" and config_file is not None: + with open(config_file) as f: + sdict = ''.join( + f.read().replace("\n", "").split('{')[1].split('}')[0] + ) + tmp = ast.literal_eval('{' + sdict.strip() + '}') + for key in tmp: + if key == 'time': + time = tmp[key] + self.time = list(map(int, time.split(","))) + if key == 'cmap': + self.get_cmap(tmp[key]) + if key == 'latitude': + self.latitude = tmp[key] + if key == 'longitude': + self.longitude = tmp[key] + if key == 'land': + self.land = float(tmp[key]) + if key == 'ocean': + self.ocean = float(tmp[key]) + if key == 'coastline': + self.coastline = float(tmp[key]) + if key == 'borders': + self.borders = float(tmp[key]) + if key == 'xlim': + xlim = tmp[key] + self.xlim = list(map(float, xlim.split(","))) + if key == 'ylim': + ylim = tmp[key] + self.ylim = list(map(float, ylim.split(","))) + if key == 'range': + range_values = tmp[key] + self.range = list(map(float, range_values.split(","))) + if key == 'threshold': + self.threshold = float(tmp[key]) + if key == 'label': + self.label['label'] = tmp[key] + if key == 'title': + self.title = tmp[key] + + if type(self.input) is list: + self.dset = xr.open_mfdataset(self.input, use_cftime=True) + else: + self.dset = xr.open_dataset(self.input, use_cftime=True) + + if verbose: + print("input: ", self.input) + print("proj: ", self.proj) + print("varname: ", self.varname) + print("time: ", self.time) + print("minval, maxval: ", self.range) + print("title: ", self.title) + print("output: ", self.output) + print("label: ", self.label) + print("shift: ", self.shift) + print("ocean: ", self.ocean) + print("land: ", self.land) + print("coastline: ", self.coastline) + print("borders: ", self.borders) + print("latitude: ", self.latitude) + print("longitude: ", self.longitude) + print("xlim: ", self.xlim) + print("ylim: ", self.ylim) + + def get_cmap(self, cmap): + if cmap[0:3] == 'cm.': + self.cmap = cm.__dict__[cmap[3:]] + else: + self.cmap = cmap + + def projection(self): + if self.proj is None: + return ccrs.PlateCarree() + + proj_dict = ast.literal_eval(self.proj) + user_proj = proj_dict.pop("proj") + if user_proj == 'PlateCarree': + self.xylim_supported = True + return ccrs.PlateCarree(**proj_dict) + elif user_proj == 'AlbersEqualArea': + return ccrs.AlbersEqualArea(**proj_dict) + elif user_proj == 'AzimuthalEquidistant': + return ccrs.AzimuthalEquidistant(**proj_dict) + elif user_proj == 'EquidistantConic': + return ccrs.EquidistantConic(**proj_dict) + elif user_proj == 'LambertConformal': + return ccrs.LambertConformal(**proj_dict) + elif user_proj == 'LambertCylindrical': + return ccrs.LambertCylindrical(**proj_dict) + elif user_proj == 'Mercator': + return ccrs.Mercator(**proj_dict) + elif user_proj == 'Miller': + return ccrs.Miller(**proj_dict) + elif user_proj == 'Mollweide': + return ccrs.Mollweide(**proj_dict) + elif user_proj == 'Orthographic': + return ccrs.Orthographic(**proj_dict) + elif user_proj == 'Robinson': + return ccrs.Robinson(**proj_dict) + elif user_proj == 'Sinusoidal': + return ccrs.Sinusoidal(**proj_dict) + elif user_proj == 'Stereographic': + return ccrs.Stereographic(**proj_dict) + elif user_proj == 'TransverseMercator': + return ccrs.TransverseMercator(**proj_dict) + elif user_proj == 'UTM': + return ccrs.UTM(**proj_dict) + elif user_proj == 'InterruptedGoodeHomolosine': + return ccrs.InterruptedGoodeHomolosine(**proj_dict) + elif user_proj == 'RotatedPole': + return ccrs.RotatedPole(**proj_dict) + elif user_proj == 'OSGB': + self.xylim_supported = False + return ccrs.OSGB(**proj_dict) + elif user_proj == 'EuroPP': + self.xylim_supported = False + return ccrs.EuroPP(**proj_dict) + elif user_proj == 'Geostationary': + return ccrs.Geostationary(**proj_dict) + elif user_proj == 'NearsidePerspective': + return ccrs.NearsidePerspective(**proj_dict) + elif user_proj == 'EckertI': + return ccrs.EckertI(**proj_dict) + elif user_proj == 'EckertII': + return ccrs.EckertII(**proj_dict) + elif user_proj == 'EckertIII': + return ccrs.EckertIII(**proj_dict) + elif user_proj == 'EckertIV': + return ccrs.EckertIV(**proj_dict) + elif user_proj == 'EckertV': + return ccrs.EckertV(**proj_dict) + elif user_proj == 'EckertVI': + return ccrs.EckertVI(**proj_dict) + elif user_proj == 'EqualEarth': + return ccrs.EqualEarth(**proj_dict) + elif user_proj == 'Gnomonic': + return ccrs.Gnomonic(**proj_dict) + elif user_proj == 'LambertAzimuthalEqualArea': + return ccrs.LambertAzimuthalEqualArea(**proj_dict) + elif user_proj == 'NorthPolarStereo': + return ccrs.NorthPolarStereo(**proj_dict) + elif user_proj == 'OSNI': + return ccrs.OSNI(**proj_dict) + elif user_proj == 'SouthPolarStereo': + return ccrs.SouthPolarStereo(**proj_dict) + + def plot(self, ts=None): + if self.shift: + if self.longitude == 'longitude': + self.dset = self.dset.assign_coords( + longitude=((( + self.dset[self.longitude] + + 180) % 360) - 180)) + elif self.longitude == 'lon': + self.dset = self.dset.assign_coords( + lon=(((self.dset[self.longitude] + + 180) % 360) - 180)) + + pyplot.figure(1, figsize=[20, 10]) + + # Set the projection to use for plotting + ax = pyplot.subplot(1, 1, 1, projection=self.projection()) + if self.land: + ax.add_feature(feature.LAND, alpha=self.land) + + if self.ocean: + ax.add_feature(feature.OCEAN, alpha=self.ocean) + if self.coastline: + ax.coastlines(resolution='10m', alpha=self.coastline) + if self.borders: + ax.add_feature(feature.BORDERS, linestyle=':', alpha=self.borders) + + if self.xlim: + min_lon = min(self.xlim[0], self.xlim[1]) + max_lon = max(self.xlim[0], self.xlim[1]) + else: + min_lon = self.dset[self.longitude].min() + max_lon = self.dset[self.longitude].max() + + if self.ylim: + min_lat = min(self.ylim[0], self.ylim[1]) + max_lat = max(self.ylim[0], self.ylim[1]) + else: + min_lat = self.dset[self.latitude].min() + max_lat = self.dset[self.latitude].max() + + if self.xylim_supported: + pyplot.xlim(min_lon, max_lon) + pyplot.ylim(min_lat, max_lat) + + # Fix extent + if self.threshold == "" or self.threshold is None: + threshold = self.dset[self.varname].min() + else: + threshold = float(self.threshold) + + if self.range == []: + minval = self.dset[self.varname].min() + maxval = self.dset[self.varname].max() + else: + minval = self.range[0] + maxval = self.range[1] + + if self.verbose: + print("minval: ", minval) + print("maxval: ", maxval) + + # pass extent with vmin and vmax parameters + proj_t = ccrs.PlateCarree() + if ts is None: + self.dset.where( + self.dset[self.varname] > threshold + )[self.varname].plot(ax=ax, + vmin=minval, + vmax=maxval, + transform=proj_t, + cmap=self.cmap, + cbar_kwargs=self.label + ) + if self.title != "" and self.title is not None: + pyplot.title(self.title) + pyplot.savefig(self.output) + else: + if self.colorbar: + self.dset.where( + self.dset[self.varname] > threshold + )[self.varname].isel(time=ts).plot(ax=ax, + vmin=minval, + vmax=maxval, + transform=proj_t, + cmap=self.cmap, + cbar_kwargs=self.label + ) + else: + self.dset.where( + self.dset[self.varname] > minval + )[self.varname].isel(time=ts).plot(ax=ax, + vmin=minval, + vmax=maxval, + transform=proj_t, + cmap=self.cmap, + add_colorbar=False) + if self.title != "" and self.title is not None: + pyplot.title(self.title + "(time = " + str(ts) + ')') + pyplot.savefig(self.output[:-4] + "_time" + str(ts) + + self.output[-4:]) # assume png format + + +if __name__ == '__main__': + warnings.filterwarnings("ignore") + parser = argparse.ArgumentParser() + parser.add_argument( + 'input', + help='input filename with geographical coordinates (netCDF format)' + ) + parser.add_argument( + '--proj', + help='Config file with the projection on which we draw' + ) + parser.add_argument( + 'varname', + help='Specify which variable to plot (case sensitive)' + ) + parser.add_argument( + '--output', + help='output filename to store resulting image (png format)' + ) + parser.add_argument( + '--config', + help='pass plotting parameters via a config file' + ) + parser.add_argument( + '--shift', + help='shift longitudes if specified', + action="store_true" + ) + parser.add_argument( + "-v", "--verbose", + help="switch on verbose mode", + action="store_true") + args = parser.parse_args() + + dset = MapPlotXr(input=args.input, varname=args.varname, + output=args.output, verbose=args.verbose, + config_file=args.config, proj=args.proj, + shift=args.shift) + + if dset.time == []: + dset.plot() + else: + for t in dset.time: + dset.plot(t) + dset.shift = False # only shift once + dset.colorbar = True diff -r 6015f30a7258 -r 3e73f657a998 xarray_netcdf2netcdf.py --- a/xarray_netcdf2netcdf.py Sun Aug 29 16:46:54 2021 +0000 +++ b/xarray_netcdf2netcdf.py Thu Jan 20 17:07:54 2022 +0000 @@ -1,133 +1,268 @@ -#!/usr/bin/env python3 -# -# Apply operations on selected variables -# - scale -# one can also select the range of time (for timeseries) -# to apply these operations over the range only -# when a range of time is selected and when scaling, one -# can choose to save the entire timeseries or -# the selected range only. -# when scaling, one can add additional filters on dimensions -# (typically used to filter over latitudes and longitudes) - - -import argparse -import warnings - -import xarray as xr # noqa: E402 - - -class netCDF2netCDF (): - def __init__(self, infile, varname, scale="", - output="output.netcdf", - write_all=False, - filter_list="", - verbose=False): - self.infile = infile - self.verbose = verbose - self.varname = varname - self.write_all = write_all - self.filter = filter_list - self.selection = {} - if scale == "" or scale is None: - self.scale = 1 - else: - self.scale = float(scale) - if output is None: - self.output = "output.netcdf" - else: - self.output = output - # initialization - self.dset = None - self.subset = None - if self.verbose: - print("infile: ", self.infile) - print("varname: ", self.varname) - print("filter_list: ", self.filter) - print("scale: ", self.scale) - print("write_all: ", self.write_all) - print("output: ", self.output) - - def dimension_selection(self, single_filter): - split_filter = single_filter.split('#') - dimension_varname = split_filter[0] - op = split_filter[1] - ll = int(split_filter[2]) - if (op == 'sl'): - rl = int(split_filter[3]) - self.selection[dimension_varname] = slice(ll, rl) - elif (op == 'to'): - self.selection[dimension_varname] = slice(None, ll) - elif (op == 'from'): - self.selection[dimension_varname] = slice(ll, None) - elif (op == 'is'): - self.selection[dimension_varname] = ll - - def filter_selection(self): - for single_filter in self.filter: - self.dimension_selection(single_filter) - if self.write_all: - self.ds[self.varname] = \ - self.ds[self.varname].isel(self.selection)*self.scale - else: - self.dset = \ - self.ds[self.varname].isel(self.selection)*self.scale - - def compute(self): - if self.dset is None: - self.ds = xr.open_dataset(self.infile) - if self.filter: - self.filter_selection() - if self.verbose: - print(self.selection) - elif self.write_all is not None: - self.dset = self.ds[self.varname] - - def save(self): - if self.write_all: - self.ds.to_netcdf(self.output) - else: - self.dset.to_netcdf(self.output) - - -if __name__ == '__main__': - warnings.filterwarnings("ignore") - parser = argparse.ArgumentParser() - parser.add_argument( - 'input', - help='input filename in netCDF format' - ) - parser.add_argument( - 'varname', - help='Specify which variable to plot (case sensitive)' - ) - parser.add_argument( - '--filter', - nargs="*", - help='Filter list variable#operator#value_s#value_e' - ) - parser.add_argument( - '--output', - help='Output filename to store the resulting netCDF file' - ) - parser.add_argument( - '--scale', - help='scale factor to apply to selection (float)' - ) - parser.add_argument( - "--write_all", - help="write all data to netCDF", - action="store_true") - parser.add_argument( - "-v", "--verbose", - help="switch on verbose mode", - action="store_true") - args = parser.parse_args() - - dset = netCDF2netCDF(infile=args.input, varname=args.varname, - scale=args.scale, output=args.output, - filter_list=args.filter, - write_all=args.write_all, - verbose=args.verbose) - dset.compute() - dset.save() +#!/usr/bin/env python3 +# +# Apply operations on selected variables +# - scale +# one can also select the range of time (for timeseries) +# to apply these operations over the range only +# when a range of time is selected and when scaling, one +# can choose to save the entire timeseries or +# the selected range only. +# when scaling, one can add additional filters on dimensions +# (typically used to filter over latitudes and longitudes) + + +import argparse +import re +import warnings +from pathlib import Path + +import xarray as xr # noqa: E402 + + +class netCDF2netCDF (): + def __init__(self, infile, varname, scale="", + output="output.netcdf", + write_all=False, + keep_attributes=True, + filter_list="", + where_config="", + other="", + sel=False, + drop=False, + verbose=False): + self.drop = drop + if Path(where_config).exists(): + f = open(where_config) + self.where = f.read().replace("\n", "") + else: + self.where = "" + self.other = other + self.sel = sel + li = list(infile.split(",")) + if len(li) > 1: + self.infile = li + else: + self.infile = infile + self.verbose = verbose + if varname == 'None' or varname is None: + self.varname = varname + else: + li = list(varname.split(",")) + self.varname = li + self.write_all = write_all + self.keep_attributes = keep_attributes + if self.keep_attributes: + xr.set_options(keep_attrs=True) + self.filter = filter_list + self.selection = {} + self.method = {} + if scale == "" or scale is None: + self.scale = 1 + else: + self.scale = float(scale) + if output is None: + self.output = "output.netcdf" + else: + self.output = output + # initialization + self.dset = None + self.subset = None + if self.verbose: + print("infile: ", self.infile) + print("varname: ", self.varname) + print("filter_list: ", self.filter) + print("scale: ", self.scale) + print("write_all: ", self.write_all) + print("keep_attributes: ", self.keep_attributes) + print("sel: ", self.sel) + print("output: ", self.output) + + def apply_selection(self): + self.dset = self.ds + for key in self.selection: + if 'slice' in str(self.selection[key]): + self.dset = self.dset.sel( + {key: self.selection[key]} + ) + else: + self.dset = self.dset.sel( + {key: self.selection[key]}, + method=self.method[key] + ) + + def dimension_selection(self, single_filter): + split_filter = single_filter.split('#') + dimension_varname = split_filter[0] + op = split_filter[1] + if self.sel: + ll = float(split_filter[2]) + else: + ll = int(split_filter[2]) + if (op == 'sl'): + if self.sel: + rl = float(split_filter[3]) + else: + rl = int(split_filter[3]) + self.selection[dimension_varname] = slice(ll, rl) + elif (op == 'to'): + self.selection[dimension_varname] = slice(None, ll) + elif (op == 'from'): + self.selection[dimension_varname] = slice(ll, None) + elif (op == 'is'): + self.selection[dimension_varname] = ll + if self.sel: + rl = split_filter[3] + if 'None' in rl: + self.method[dimension_varname] = None + else: + self.method[dimension_varname] = rl + + def filter_selection(self): + for single_filter in self.filter: + self.dimension_selection(single_filter) + + if self.sel: + self.apply_selection() + else: + self.dset = \ + self.ds.isel(self.selection) + + if self.varname != 'None' and self.varname is not None: + for var in self.varname: + self.dset[var] = \ + self.dset[var]*self.scale + + def compute(self): + if self.dset is None: + if type(self.infile) is list: + self.ds = xr.open_mfdataset(self.infile) + else: + self.ds = xr.open_dataset(self.infile) + if self.where != "": + if self.drop: + if self.verbose: + print("Where with drop=True") + self.ds = self.ds.where( + self.eval_where(self.where), + drop=True + ) + elif self.other is not None and self.other != "": + if self.verbose: + print("Where with other=", float(self.other)) + self.ds = self.ds.where( + self.eval_where(self.where), + other=float(self.other) + ) + else: + self.ds = self.ds.where( + self.eval_where(self.where) + ) + self.filter_selection() + if self.verbose: + print(self.selection) + + def save(self): + if self.varname != 'None' and \ + self.varname is not None and \ + not self.write_all: + self.dset[self.varname].to_netcdf(self.output) + else: + self.dset.to_netcdf(self.output) + + def is_float(self, element) -> bool: + try: + float(element) + return True + except ValueError: + return False + + def eval_where(self, where_condition): + eval_cond = None + list_names = list(set( + list(self.ds.keys()) + + list(self.ds.coords.keys())) + ) + wcond = where_condition + check_cond = where_condition + for var in list_names: + wcond = wcond.replace(var, ' self.ds.' + var + ' ') + check_cond = check_cond.replace(var, '') + to_remove = "[><=&|()]" + check_cond = re.sub(to_remove, "", check_cond).replace("!", "") + check_cond = re.sub(' +', ' ', check_cond).strip() + list_flt = check_cond.split(" ") + no_convert = False + for num in list_flt: + if not self.is_float(num): + no_convert = True + if not no_convert: + eval_cond = eval(wcond) + return eval_cond + + +if __name__ == '__main__': + warnings.filterwarnings("ignore") + parser = argparse.ArgumentParser() + parser.add_argument( + 'input', + help='input filename in netCDF format' + ) + parser.add_argument( + 'varname', + help='Specify which variable to plot (case sensitive)' + ) + parser.add_argument( + '--filter', + nargs="*", + help='Filter list variable#operator#value_s#value_e' + ) + parser.add_argument( + '--where', + help='filename with where condition to be evaluated' + ) + parser.add_argument( + '--output', + help='Output filename to store the resulting netCDF file' + ) + parser.add_argument( + '--scale', + help='scale factor to apply to selection (float)' + ) + parser.add_argument( + '--other', + help='Value to use for locations where condition is False (float)' + ) + parser.add_argument( + "--write_all", + help="write all data to netCDF", + action="store_true") + parser.add_argument( + "--keep_attributes", + help="Keep all attributes", + action="store_true") + parser.add_argument( + "-v", "--verbose", + help="switch on verbose mode", + action="store_true") + parser.add_argument( + "--selection", + help="select by values", + action="store_true") + parser.add_argument( + "--drop", + help="drop values where condition is not met", + action="store_true") + args = parser.parse_args() + + print("args.selection", args.selection) + dset = netCDF2netCDF(infile=args.input, varname=args.varname, + scale=args.scale, output=args.output, + write_all=args.write_all, + sel=args.selection, + keep_attributes=args.keep_attributes, + filter_list=args.filter, + where_config=args.where, + drop=args.drop, other=args.other, + verbose=args.verbose) + dset.compute() + dset.save() diff -r 6015f30a7258 -r 3e73f657a998 xarray_tool.py --- a/xarray_tool.py Sun Aug 29 16:46:54 2021 +0000 +++ b/xarray_tool.py Thu Jan 20 17:07:54 2022 +0000 @@ -1,365 +1,365 @@ -# xarray tool for: -# - getting metadata information -# - select data and save results in csv file for further post-processing - -import argparse -import csv -import os -import warnings - -import geopandas as gdp - -import pandas as pd - -from shapely.geometry import Point -from shapely.ops import nearest_points - -import xarray as xr - - -class XarrayTool (): - def __init__(self, infile, outfile_info="", outfile_summary="", - select="", outfile="", outputdir="", latname="", - latvalN="", latvalS="", lonname="", lonvalE="", - lonvalW="", filter_list="", coords="", time="", - verbose=False, no_missing=False, coords_info=None, - tolerance=None): - self.infile = infile - self.outfile_info = outfile_info - self.outfile_summary = outfile_summary - self.select = select - self.outfile = outfile - self.outputdir = outputdir - self.latname = latname - if tolerance != "" and tolerance is not None: - self.tolerance = float(tolerance) - else: - self.tolerance = -1 - if latvalN != "" and latvalN is not None: - self.latvalN = float(latvalN) - else: - self.latvalN = "" - if latvalS != "" and latvalS is not None: - self.latvalS = float(latvalS) - else: - self.latvalS = "" - self.lonname = lonname - if lonvalE != "" and lonvalE is not None: - self.lonvalE = float(lonvalE) - else: - self.lonvalE = "" - if lonvalW != "" and lonvalW is not None: - self.lonvalW = float(lonvalW) - else: - self.lonvalW = "" - self.filter = filter_list - self.time = time - self.coords = coords - self.verbose = verbose - self.no_missing = no_missing - # initialization - self.dset = None - self.gset = None - self.coords_info = coords_info - if self.verbose: - print("infile: ", self.infile) - print("outfile_info: ", self.outfile_info) - print("outfile_summary: ", self.outfile_summary) - print("outfile: ", self.outfile) - print("select: ", self.select) - print("outfile: ", self.outfile) - print("outputdir: ", self.outputdir) - print("latname: ", self.latname) - print("latvalN: ", self.latvalN) - print("latvalS: ", self.latvalS) - print("lonname: ", self.lonname) - print("lonvalE: ", self.lonvalE) - print("lonvalW: ", self.lonvalW) - print("filter: ", self.filter) - print("time: ", self.time) - print("coords: ", self.coords) - print("coords_info: ", self.coords_info) - - def info(self): - f = open(self.outfile_info, 'w') - ds = xr.open_dataset(self.infile) - ds.info(f) - f.close() - - def summary(self): - f = open(self.outfile_summary, 'w') - ds = xr.open_dataset(self.infile) - writer = csv.writer(f, delimiter='\t') - header = ['VariableName', 'NumberOfDimensions'] - for idx, val in enumerate(ds.dims.items()): - header.append('Dim' + str(idx) + 'Name') - header.append('Dim' + str(idx) + 'Size') - writer.writerow(header) - for name, da in ds.data_vars.items(): - line = [name] - line.append(len(ds[name].shape)) - for d, s in zip(da.shape, da.sizes): - line.append(s) - line.append(d) - writer.writerow(line) - for name, da in ds.coords.items(): - line = [name] - line.append(len(ds[name].shape)) - for d, s in zip(da.shape, da.sizes): - line.append(s) - line.append(d) - writer.writerow(line) - f.close() - - def rowfilter(self, single_filter): - split_filter = single_filter.split('#') - filter_varname = split_filter[0] - op = split_filter[1] - ll = float(split_filter[2]) - if (op == 'bi'): - rl = float(split_filter[3]) - if filter_varname == self.select: - # filter on values of the selected variable - if op == 'bi': - self.dset = self.dset.where( - (self.dset <= rl) & (self.dset >= ll) - ) - elif op == 'le': - self.dset = self.dset.where(self.dset <= ll) - elif op == 'ge': - self.dset = self.dset.where(self.dset >= ll) - elif op == 'e': - self.dset = self.dset.where(self.dset == ll) - else: # filter on other dimensions of the selected variable - if op == 'bi': - self.dset = self.dset.sel({filter_varname: slice(ll, rl)}) - elif op == 'le': - self.dset = self.dset.sel({filter_varname: slice(None, ll)}) - elif op == 'ge': - self.dset = self.dset.sel({filter_varname: slice(ll, None)}) - elif op == 'e': - self.dset = self.dset.sel({filter_varname: ll}, - method='nearest') - - def selection(self): - if self.dset is None: - self.ds = xr.open_dataset(self.infile) - self.dset = self.ds[self.select] # select variable - if self.time: - self.datetime_selection() - if self.filter: - self.filter_selection() - - self.area_selection() - if self.gset.count() > 1: - # convert to dataframe if several rows and cols - self.gset = self.gset.to_dataframe().dropna(how='all'). \ - reset_index() - self.gset.to_csv(self.outfile, header=True, sep='\t') - else: - data = { - self.latname: [self.gset[self.latname].values], - self.lonname: [self.gset[self.lonname].values], - self.select: [self.gset.values] - } - - df = pd.DataFrame(data, columns=[self.latname, self.lonname, - self.select]) - df.to_csv(self.outfile, header=True, sep='\t') - - def datetime_selection(self): - split_filter = self.time.split('#') - time_varname = split_filter[0] - op = split_filter[1] - ll = split_filter[2] - if (op == 'sl'): - rl = split_filter[3] - self.dset = self.dset.sel({time_varname: slice(ll, rl)}) - elif (op == 'to'): - self.dset = self.dset.sel({time_varname: slice(None, ll)}) - elif (op == 'from'): - self.dset = self.dset.sel({time_varname: slice(ll, None)}) - elif (op == 'is'): - self.dset = self.dset.sel({time_varname: ll}, method='nearest') - - def filter_selection(self): - for single_filter in self.filter: - self.rowfilter(single_filter) - - def area_selection(self): - - if self.latvalS != "" and self.lonvalW != "": - # Select geographical area - self.gset = self.dset.sel({self.latname: - slice(self.latvalS, self.latvalN), - self.lonname: - slice(self.lonvalW, self.lonvalE)}) - elif self.latvalN != "" and self.lonvalE != "": - # select nearest location - if self.no_missing: - self.nearest_latvalN = self.latvalN - self.nearest_lonvalE = self.lonvalE - else: - # find nearest location without NaN values - self.nearest_location() - if self.tolerance > 0: - self.gset = self.dset.sel({self.latname: self.nearest_latvalN, - self.lonname: self.nearest_lonvalE}, - method='nearest', - tolerance=self.tolerance) - else: - self.gset = self.dset.sel({self.latname: self.nearest_latvalN, - self.lonname: self.nearest_lonvalE}, - method='nearest') - else: - self.gset = self.dset - - def nearest_location(self): - # Build a geopandas dataframe with all first elements in each dimension - # so we assume null values correspond to a mask that is the same for - # all dimensions in the dataset. - dsel_frame = self.dset - for dim in self.dset.dims: - if dim != self.latname and dim != self.lonname: - dsel_frame = dsel_frame.isel({dim: 0}) - # transform to pandas dataframe - dff = dsel_frame.to_dataframe().dropna().reset_index() - # transform to geopandas to collocate - gdf = gdp.GeoDataFrame(dff, - geometry=gdp.points_from_xy(dff[self.lonname], - dff[self.latname])) - # Find nearest location where values are not null - point = Point(self.lonvalE, self.latvalN) - multipoint = gdf.geometry.unary_union - queried_geom, nearest_geom = nearest_points(point, multipoint) - self.nearest_latvalN = nearest_geom.y - self.nearest_lonvalE = nearest_geom.x - - def selection_from_coords(self): - fcoords = pd.read_csv(self.coords, sep='\t') - for row in fcoords.itertuples(): - self.latvalN = row[0] - self.lonvalE = row[1] - self.outfile = (os.path.join(self.outputdir, - self.select + '_' + - str(row.Index) + '.tabular')) - self.selection() - - def get_coords_info(self): - ds = xr.open_dataset(self.infile) - for c in ds.coords: - filename = os.path.join(self.coords_info, - c.strip() + - '.tabular') - pd = ds.coords[c].to_pandas() - pd.index = range(len(pd)) - pd.to_csv(filename, header=False, sep='\t') - - -if __name__ == '__main__': - warnings.filterwarnings("ignore") - parser = argparse.ArgumentParser() - - parser.add_argument( - 'infile', - help='netCDF input filename' - ) - parser.add_argument( - '--info', - help='Output filename where metadata information is stored' - ) - parser.add_argument( - '--summary', - help='Output filename where data summary information is stored' - ) - parser.add_argument( - '--select', - help='Variable name to select' - ) - parser.add_argument( - '--latname', - help='Latitude name' - ) - parser.add_argument( - '--latvalN', - help='North latitude value' - ) - parser.add_argument( - '--latvalS', - help='South latitude value' - ) - parser.add_argument( - '--lonname', - help='Longitude name' - ) - parser.add_argument( - '--lonvalE', - help='East longitude value' - ) - parser.add_argument( - '--lonvalW', - help='West longitude value' - ) - parser.add_argument( - '--tolerance', - help='Maximum distance between original and selected value for ' - ' inexact matches e.g. abs(index[indexer] - target) <= tolerance' - ) - parser.add_argument( - '--coords', - help='Input file containing Latitude and Longitude' - 'for geographical selection' - ) - parser.add_argument( - '--coords_info', - help='output-folder where for each coordinate, coordinate values ' - ' are being printed in the corresponding outputfile' - ) - parser.add_argument( - '--filter', - nargs="*", - help='Filter list variable#operator#value_s#value_e' - ) - parser.add_argument( - '--time', - help='select timeseries variable#operator#value_s[#value_e]' - ) - parser.add_argument( - '--outfile', - help='csv outfile for storing results of the selection' - '(valid only when --select)' - ) - parser.add_argument( - '--outputdir', - help='folder name for storing results with multiple selections' - '(valid only when --select)' - ) - parser.add_argument( - "-v", "--verbose", - help="switch on verbose mode", - action="store_true" - ) - parser.add_argument( - "--no_missing", - help="""Do not take into account possible null/missing values - (only valid for single location)""", - action="store_true" - ) - args = parser.parse_args() - - p = XarrayTool(args.infile, args.info, args.summary, args.select, - args.outfile, args.outputdir, args.latname, - args.latvalN, args.latvalS, args.lonname, - args.lonvalE, args.lonvalW, args.filter, - args.coords, args.time, args.verbose, - args.no_missing, args.coords_info, args.tolerance) - if args.info: - p.info() - if args.summary: - p.summary() - if args.coords: - p.selection_from_coords() - elif args.select: - p.selection() - elif args.coords_info: - p.get_coords_info() +# xarray tool for: +# - getting metadata information +# - select data and save results in csv file for further post-processing + +import argparse +import csv +import os +import warnings + +import geopandas as gdp + +import pandas as pd + +from shapely.geometry import Point +from shapely.ops import nearest_points + +import xarray as xr + + +class XarrayTool (): + def __init__(self, infile, outfile_info="", outfile_summary="", + select="", outfile="", outputdir="", latname="", + latvalN="", latvalS="", lonname="", lonvalE="", + lonvalW="", filter_list="", coords="", time="", + verbose=False, no_missing=False, coords_info=None, + tolerance=None): + self.infile = infile + self.outfile_info = outfile_info + self.outfile_summary = outfile_summary + self.select = select + self.outfile = outfile + self.outputdir = outputdir + self.latname = latname + if tolerance != "" and tolerance is not None: + self.tolerance = float(tolerance) + else: + self.tolerance = -1 + if latvalN != "" and latvalN is not None: + self.latvalN = float(latvalN) + else: + self.latvalN = "" + if latvalS != "" and latvalS is not None: + self.latvalS = float(latvalS) + else: + self.latvalS = "" + self.lonname = lonname + if lonvalE != "" and lonvalE is not None: + self.lonvalE = float(lonvalE) + else: + self.lonvalE = "" + if lonvalW != "" and lonvalW is not None: + self.lonvalW = float(lonvalW) + else: + self.lonvalW = "" + self.filter = filter_list + self.time = time + self.coords = coords + self.verbose = verbose + self.no_missing = no_missing + # initialization + self.dset = None + self.gset = None + self.coords_info = coords_info + if self.verbose: + print("infile: ", self.infile) + print("outfile_info: ", self.outfile_info) + print("outfile_summary: ", self.outfile_summary) + print("outfile: ", self.outfile) + print("select: ", self.select) + print("outfile: ", self.outfile) + print("outputdir: ", self.outputdir) + print("latname: ", self.latname) + print("latvalN: ", self.latvalN) + print("latvalS: ", self.latvalS) + print("lonname: ", self.lonname) + print("lonvalE: ", self.lonvalE) + print("lonvalW: ", self.lonvalW) + print("filter: ", self.filter) + print("time: ", self.time) + print("coords: ", self.coords) + print("coords_info: ", self.coords_info) + + def info(self): + f = open(self.outfile_info, 'w') + ds = xr.open_dataset(self.infile) + ds.info(f) + f.close() + + def summary(self): + f = open(self.outfile_summary, 'w') + ds = xr.open_dataset(self.infile) + writer = csv.writer(f, delimiter='\t') + header = ['VariableName', 'NumberOfDimensions'] + for idx, val in enumerate(ds.dims.items()): + header.append('Dim' + str(idx) + 'Name') + header.append('Dim' + str(idx) + 'Size') + writer.writerow(header) + for name, da in ds.data_vars.items(): + line = [name] + line.append(len(ds[name].shape)) + for d, s in zip(da.shape, da.sizes): + line.append(s) + line.append(d) + writer.writerow(line) + for name, da in ds.coords.items(): + line = [name] + line.append(len(ds[name].shape)) + for d, s in zip(da.shape, da.sizes): + line.append(s) + line.append(d) + writer.writerow(line) + f.close() + + def rowfilter(self, single_filter): + split_filter = single_filter.split('#') + filter_varname = split_filter[0] + op = split_filter[1] + ll = float(split_filter[2]) + if (op == 'bi'): + rl = float(split_filter[3]) + if filter_varname == self.select: + # filter on values of the selected variable + if op == 'bi': + self.dset = self.dset.where( + (self.dset <= rl) & (self.dset >= ll) + ) + elif op == 'le': + self.dset = self.dset.where(self.dset <= ll) + elif op == 'ge': + self.dset = self.dset.where(self.dset >= ll) + elif op == 'e': + self.dset = self.dset.where(self.dset == ll) + else: # filter on other dimensions of the selected variable + if op == 'bi': + self.dset = self.dset.sel({filter_varname: slice(ll, rl)}) + elif op == 'le': + self.dset = self.dset.sel({filter_varname: slice(None, ll)}) + elif op == 'ge': + self.dset = self.dset.sel({filter_varname: slice(ll, None)}) + elif op == 'e': + self.dset = self.dset.sel({filter_varname: ll}, + method='nearest') + + def selection(self): + if self.dset is None: + self.ds = xr.open_dataset(self.infile) + self.dset = self.ds[self.select] # select variable + if self.time: + self.datetime_selection() + if self.filter: + self.filter_selection() + + self.area_selection() + if self.gset.count() > 1: + # convert to dataframe if several rows and cols + self.gset = self.gset.to_dataframe().dropna(how='all'). \ + reset_index() + self.gset.to_csv(self.outfile, header=True, sep='\t') + else: + data = { + self.latname: [self.gset[self.latname].values], + self.lonname: [self.gset[self.lonname].values], + self.select: [self.gset.values] + } + + df = pd.DataFrame(data, columns=[self.latname, self.lonname, + self.select]) + df.to_csv(self.outfile, header=True, sep='\t') + + def datetime_selection(self): + split_filter = self.time.split('#') + time_varname = split_filter[0] + op = split_filter[1] + ll = split_filter[2] + if (op == 'sl'): + rl = split_filter[3] + self.dset = self.dset.sel({time_varname: slice(ll, rl)}) + elif (op == 'to'): + self.dset = self.dset.sel({time_varname: slice(None, ll)}) + elif (op == 'from'): + self.dset = self.dset.sel({time_varname: slice(ll, None)}) + elif (op == 'is'): + self.dset = self.dset.sel({time_varname: ll}, method='nearest') + + def filter_selection(self): + for single_filter in self.filter: + self.rowfilter(single_filter) + + def area_selection(self): + + if self.latvalS != "" and self.lonvalW != "": + # Select geographical area + self.gset = self.dset.sel({self.latname: + slice(self.latvalS, self.latvalN), + self.lonname: + slice(self.lonvalW, self.lonvalE)}) + elif self.latvalN != "" and self.lonvalE != "": + # select nearest location + if self.no_missing: + self.nearest_latvalN = self.latvalN + self.nearest_lonvalE = self.lonvalE + else: + # find nearest location without NaN values + self.nearest_location() + if self.tolerance > 0: + self.gset = self.dset.sel({self.latname: self.nearest_latvalN, + self.lonname: self.nearest_lonvalE}, + method='nearest', + tolerance=self.tolerance) + else: + self.gset = self.dset.sel({self.latname: self.nearest_latvalN, + self.lonname: self.nearest_lonvalE}, + method='nearest') + else: + self.gset = self.dset + + def nearest_location(self): + # Build a geopandas dataframe with all first elements in each dimension + # so we assume null values correspond to a mask that is the same for + # all dimensions in the dataset. + dsel_frame = self.dset + for dim in self.dset.dims: + if dim != self.latname and dim != self.lonname: + dsel_frame = dsel_frame.isel({dim: 0}) + # transform to pandas dataframe + dff = dsel_frame.to_dataframe().dropna().reset_index() + # transform to geopandas to collocate + gdf = gdp.GeoDataFrame(dff, + geometry=gdp.points_from_xy(dff[self.lonname], + dff[self.latname])) + # Find nearest location where values are not null + point = Point(self.lonvalE, self.latvalN) + multipoint = gdf.geometry.unary_union + queried_geom, nearest_geom = nearest_points(point, multipoint) + self.nearest_latvalN = nearest_geom.y + self.nearest_lonvalE = nearest_geom.x + + def selection_from_coords(self): + fcoords = pd.read_csv(self.coords, sep='\t') + for row in fcoords.itertuples(): + self.latvalN = row[0] + self.lonvalE = row[1] + self.outfile = (os.path.join(self.outputdir, + self.select + '_' + + str(row.Index) + '.tabular')) + self.selection() + + def get_coords_info(self): + ds = xr.open_dataset(self.infile) + for c in ds.coords: + filename = os.path.join(self.coords_info, + c.strip() + + '.tabular') + pd = ds.coords[c].to_pandas() + pd.index = range(len(pd)) + pd.to_csv(filename, header=False, sep='\t') + + +if __name__ == '__main__': + warnings.filterwarnings("ignore") + parser = argparse.ArgumentParser() + + parser.add_argument( + 'infile', + help='netCDF input filename' + ) + parser.add_argument( + '--info', + help='Output filename where metadata information is stored' + ) + parser.add_argument( + '--summary', + help='Output filename where data summary information is stored' + ) + parser.add_argument( + '--select', + help='Variable name to select' + ) + parser.add_argument( + '--latname', + help='Latitude name' + ) + parser.add_argument( + '--latvalN', + help='North latitude value' + ) + parser.add_argument( + '--latvalS', + help='South latitude value' + ) + parser.add_argument( + '--lonname', + help='Longitude name' + ) + parser.add_argument( + '--lonvalE', + help='East longitude value' + ) + parser.add_argument( + '--lonvalW', + help='West longitude value' + ) + parser.add_argument( + '--tolerance', + help='Maximum distance between original and selected value for ' + ' inexact matches e.g. abs(index[indexer] - target) <= tolerance' + ) + parser.add_argument( + '--coords', + help='Input file containing Latitude and Longitude' + 'for geographical selection' + ) + parser.add_argument( + '--coords_info', + help='output-folder where for each coordinate, coordinate values ' + ' are being printed in the corresponding outputfile' + ) + parser.add_argument( + '--filter', + nargs="*", + help='Filter list variable#operator#value_s#value_e' + ) + parser.add_argument( + '--time', + help='select timeseries variable#operator#value_s[#value_e]' + ) + parser.add_argument( + '--outfile', + help='csv outfile for storing results of the selection' + '(valid only when --select)' + ) + parser.add_argument( + '--outputdir', + help='folder name for storing results with multiple selections' + '(valid only when --select)' + ) + parser.add_argument( + "-v", "--verbose", + help="switch on verbose mode", + action="store_true" + ) + parser.add_argument( + "--no_missing", + help="""Do not take into account possible null/missing values + (only valid for single location)""", + action="store_true" + ) + args = parser.parse_args() + + p = XarrayTool(args.infile, args.info, args.summary, args.select, + args.outfile, args.outputdir, args.latname, + args.latvalN, args.latvalS, args.lonname, + args.lonvalE, args.lonvalW, args.filter, + args.coords, args.time, args.verbose, + args.no_missing, args.coords_info, args.tolerance) + if args.info: + p.info() + if args.summary: + p.summary() + if args.coords: + p.selection_from_coords() + elif args.select: + p.selection() + elif args.coords_info: + p.get_coords_info()