Mercurial > repos > ecology > xarray_select
changeset 4:b393815e4cb7 draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit fd8ad4d97db7b1fd3876ff63e14280474e06fdf7
author | ecology |
---|---|
date | Sun, 31 Jul 2022 21:20:41 +0000 |
parents | bf595d613af4 |
children | |
files | macros.xml macros_timeseries.xml test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0.png test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0_title.png test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time1.png test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time50.png test-data/time_series.png test-data/time_series.tabular test-data/time_series_customized.png test-data/time_series_customized.tabular test-data/version.tabular timeseries.py xarray_info.py xarray_select.py xarray_select.xml xarray_tool.py |
diffstat | 16 files changed, 822 insertions(+), 372 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Thu Jan 20 17:07:19 2022 +0000 +++ b/macros.xml Sun Jul 31 21:20:41 2022 +0000 @@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">0.20.2</token> + <token name="@TOOL_VERSION@">2022.3.0</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">20.05</token> <xml name="edam_ontology">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros_timeseries.xml Sun Jul 31 21:20:41 2022 +0000 @@ -0,0 +1,47 @@ +<macros> + <xml name="config_series"> + <configfiles> + <configfile name="series_customization"><![CDATA[ +{ +#if $condi_datetime.datetime=="yes" +#if str($condi_datetime.time_name).strip() +"time_name":'$condi_datetime.time_name', +#end if +#if str($condi_datetime.time_start_value).strip() +"time_start_value":"$condi_datetime.time_start_value", +#end if +#if str($condi_datetime.time_end_value).strip() +"time_end_value":"$condi_datetime.time_end_value", +#end if +#end if +#if str($lon_value).strip() +"lon_value":'$lon_value', +#end if +#if str($lat_value).strip() +"lat_value":'$lat_value', +#end if +#if $lon_name +"lon_name":'$lon_name', +#end if +#if $lat_name +"lat_name":'$lat_name', +#end if +#if str($adv.format_date).strip() +"format_date":'$adv.format_date', +#end if +#if str($adv.plot_title).strip() +"title":'$adv.plot_title', +#end if +#if str($adv.xlabel).strip() +"xlabel":'$adv.xlabel', +#end if +#if str($adv.ylabel).strip() +"ylabel":'$adv.ylabel', +#end if +} + ]]> + </configfile> + </configfiles> + + </xml> +</macros>
Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0.png has changed
Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time0_title.png has changed
Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time1.png has changed
Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133_time50.png has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/time_series.tabular Sun Jul 31 21:20:41 2022 +0000 @@ -0,0 +1,38 @@ +time longitude latitude depth chl +2010-12-15 00:00:00 -6.0000005 44.75 0.50576 0.31 +2011-01-15 00:00:00 -6.0000005 44.75 0.50576 0.37 +2011-02-15 00:00:00 -6.0000005 44.75 0.50576 0.81 +2011-03-15 00:00:00 -6.0000005 44.75 0.50576 1.41 +2011-04-15 00:00:00 -6.0000005 44.75 0.50576 1.8399999 +2011-05-15 00:00:00 -6.0000005 44.75 0.50576 0.099999994 +2011-06-15 00:00:00 -6.0000005 44.75 0.50576 0.03 +2011-07-15 00:00:00 -6.0000005 44.75 0.50576 0.03 +2011-08-15 00:00:00 -6.0000005 44.75 0.50576 0.01 +2011-09-15 00:00:00 -6.0000005 44.75 0.50576 0.01 +2011-10-15 00:00:00 -6.0000005 44.75 0.50576 0.02 +2011-11-15 00:00:00 -6.0000005 44.75 0.50576 0.07 +2011-12-15 00:00:00 -6.0000005 44.75 0.50576 0.34 +2012-01-15 00:00:00 -6.0000005 44.75 0.50576 0.35 +2012-02-15 00:00:00 -6.0000005 44.75 0.50576 0.37 +2012-03-15 00:00:00 -6.0000005 44.75 0.50576 1.5799999 +2012-04-15 00:00:00 -6.0000005 44.75 0.50576 1.12 +2012-05-15 00:00:00 -6.0000005 44.75 0.50576 1.16 +2012-06-15 00:00:00 -6.0000005 44.75 0.50576 0.07 +2012-07-15 00:00:00 -6.0000005 44.75 0.50576 0.01 +2012-08-15 00:00:00 -6.0000005 44.75 0.50576 0.02 +2012-09-15 00:00:00 -6.0000005 44.75 0.50576 0.03 +2012-10-15 00:00:00 -6.0000005 44.75 0.50576 0.22 +2012-11-15 00:00:00 -6.0000005 44.75 0.50576 0.34 +2012-12-15 00:00:00 -6.0000005 44.75 0.50576 0.29 +2013-01-15 00:00:00 -6.0000005 44.75 0.50576 0.37 +2013-02-15 00:00:00 -6.0000005 44.75 0.50576 0.38 +2013-03-15 00:00:00 -6.0000005 44.75 0.50576 1.15 +2013-04-15 00:00:00 -6.0000005 44.75 0.50576 1.9 +2013-05-15 00:00:00 -6.0000005 44.75 0.50576 0.5 +2013-06-15 00:00:00 -6.0000005 44.75 0.50576 0.12 +2013-07-15 00:00:00 -6.0000005 44.75 0.50576 0.01 +2013-08-15 00:00:00 -6.0000005 44.75 0.50576 0.0 +2013-09-15 00:00:00 -6.0000005 44.75 0.50576 0.01 +2013-10-15 00:00:00 -6.0000005 44.75 0.50576 0.01 +2013-11-15 00:00:00 -6.0000005 44.75 0.50576 0.12 +2013-12-15 00:00:00 -6.0000005 44.75 0.50576 0.34
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/time_series_customized.tabular Sun Jul 31 21:20:41 2022 +0000 @@ -0,0 +1,146 @@ +time longitude latitude depth chl +2002-12-15 00:00:00 -5.0000005 43.5 0.50576 0.34 +2003-01-15 00:00:00 -5.0000005 43.5 0.50576 0.41 +2003-02-15 00:00:00 -5.0000005 43.5 0.50576 0.55 +2003-03-15 00:00:00 -5.0000005 43.5 0.50576 1.0699999 +2003-04-15 00:00:00 -5.0000005 43.5 0.50576 0.89 +2003-05-15 00:00:00 -5.0000005 43.5 0.50576 0.14 +2003-06-15 00:00:00 -5.0000005 43.5 0.50576 0.02 +2003-07-15 00:00:00 -5.0000005 43.5 0.50576 0.02 +2003-08-15 00:00:00 -5.0000005 43.5 0.50576 0.02 +2003-09-15 00:00:00 -5.0000005 43.5 0.50576 0.04 +2003-10-15 00:00:00 -5.0000005 43.5 0.50576 0.08 +2003-11-15 00:00:00 -5.0000005 43.5 0.50576 0.39 +2003-12-15 00:00:00 -5.0000005 43.5 0.50576 0.31 +2004-01-15 00:00:00 -5.0000005 43.5 0.50576 0.38 +2004-02-15 00:00:00 -5.0000005 43.5 0.50576 0.57 +2004-03-15 00:00:00 -5.0000005 43.5 0.50576 1.05 +2004-04-15 00:00:00 -5.0000005 43.5 0.50576 1.43 +2004-05-15 00:00:00 -5.0000005 43.5 0.50576 1.27 +2004-06-15 00:00:00 -5.0000005 43.5 0.50576 0.81 +2004-07-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2004-08-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2004-09-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2004-10-15 00:00:00 -5.0000005 43.5 0.50576 0.19999999 +2004-11-15 00:00:00 -5.0000005 43.5 0.50576 0.41 +2004-12-15 00:00:00 -5.0000005 43.5 0.50576 0.37 +2005-01-15 00:00:00 -5.0000005 43.5 0.50576 0.42 +2005-02-15 00:00:00 -5.0000005 43.5 0.50576 0.59 +2005-03-15 00:00:00 -5.0000005 43.5 0.50576 1.37 +2005-04-15 00:00:00 -5.0000005 43.5 0.50576 1.4399999 +2005-05-15 00:00:00 -5.0000005 43.5 0.50576 0.34 +2005-06-15 00:00:00 -5.0000005 43.5 0.50576 0.22999999 +2005-07-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2005-08-15 00:00:00 -5.0000005 43.5 0.50576 0.75 +2005-09-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2005-10-15 00:00:00 -5.0000005 43.5 0.50576 0.22999999 +2005-11-15 00:00:00 -5.0000005 43.5 0.50576 0.5 +2005-12-15 00:00:00 -5.0000005 43.5 0.50576 0.42 +2006-01-15 00:00:00 -5.0000005 43.5 0.50576 0.51 +2006-02-15 00:00:00 -5.0000005 43.5 0.50576 0.81 +2006-03-15 00:00:00 -5.0000005 43.5 0.50576 1.78 +2006-04-15 00:00:00 -5.0000005 43.5 0.50576 1.87 +2006-05-15 00:00:00 -5.0000005 43.5 0.50576 0.37 +2006-06-15 00:00:00 -5.0000005 43.5 0.50576 0.87 +2006-07-15 00:00:00 -5.0000005 43.5 0.50576 0.04 +2006-08-15 00:00:00 -5.0000005 43.5 0.50576 0.04 +2006-09-15 00:00:00 -5.0000005 43.5 0.50576 0.03 +2006-10-15 00:00:00 -5.0000005 43.5 0.50576 0.22 +2006-11-15 00:00:00 -5.0000005 43.5 0.50576 0.51 +2006-12-15 00:00:00 -5.0000005 43.5 0.50576 0.41 +2007-01-15 00:00:00 -5.0000005 43.5 0.50576 0.39999998 +2007-02-15 00:00:00 -5.0000005 43.5 0.50576 0.61 +2007-03-15 00:00:00 -5.0000005 43.5 0.50576 1.24 +2007-04-15 00:00:00 -5.0000005 43.5 0.50576 1.09 +2007-05-15 00:00:00 -5.0000005 43.5 0.50576 0.28 +2007-06-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2007-07-15 00:00:00 -5.0000005 43.5 0.50576 0.01 +2007-08-15 00:00:00 -5.0000005 43.5 0.50576 0.02 +2007-09-15 00:00:00 -5.0000005 43.5 0.50576 1.6899999 +2007-10-15 00:00:00 -5.0000005 43.5 0.50576 0.71999997 +2007-11-15 00:00:00 -5.0000005 43.5 0.50576 1.25 +2007-12-15 00:00:00 -5.0000005 43.5 0.50576 0.68 +2008-01-15 00:00:00 -5.0000005 43.5 0.50576 0.57 +2008-02-15 00:00:00 -5.0000005 43.5 0.50576 0.95 +2008-03-15 00:00:00 -5.0000005 43.5 0.50576 1.1 +2008-04-15 00:00:00 -5.0000005 43.5 0.50576 1.35 +2008-05-15 00:00:00 -5.0000005 43.5 0.50576 0.34 +2008-06-15 00:00:00 -5.0000005 43.5 0.50576 0.01 +2008-07-15 00:00:00 -5.0000005 43.5 0.50576 0.04 +2008-08-15 00:00:00 -5.0000005 43.5 0.50576 0.02 +2008-09-15 00:00:00 -5.0000005 43.5 0.50576 0.31 +2008-10-15 00:00:00 -5.0000005 43.5 0.50576 0.17 +2008-11-15 00:00:00 -5.0000005 43.5 0.50576 0.21 +2008-12-15 00:00:00 -5.0000005 43.5 0.50576 0.34 +2009-01-15 00:00:00 -5.0000005 43.5 0.50576 0.42999998 +2009-02-15 00:00:00 -5.0000005 43.5 0.50576 0.55 +2009-03-15 00:00:00 -5.0000005 43.5 0.50576 1.0 +2009-04-15 00:00:00 -5.0000005 43.5 0.50576 0.71999997 +2009-05-15 00:00:00 -5.0000005 43.5 0.50576 0.14 +2009-06-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2009-07-15 00:00:00 -5.0000005 43.5 0.50576 0.02 +2009-08-15 00:00:00 -5.0000005 43.5 0.50576 0.01 +2009-09-15 00:00:00 -5.0000005 43.5 0.50576 0.29 +2009-10-15 00:00:00 -5.0000005 43.5 0.50576 0.90999997 +2009-11-15 00:00:00 -5.0000005 43.5 0.50576 0.45 +2009-12-15 00:00:00 -5.0000005 43.5 0.50576 0.34 +2010-01-15 00:00:00 -5.0000005 43.5 0.50576 0.42999998 +2010-02-15 00:00:00 -5.0000005 43.5 0.50576 0.56 +2010-03-15 00:00:00 -5.0000005 43.5 0.50576 1.35 +2010-04-15 00:00:00 -5.0000005 43.5 0.50576 1.63 +2010-05-15 00:00:00 -5.0000005 43.5 0.50576 0.41 +2010-06-15 00:00:00 -5.0000005 43.5 0.50576 0.099999994 +2010-07-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2010-08-15 00:00:00 -5.0000005 43.5 0.50576 0.03 +2010-09-15 00:00:00 -5.0000005 43.5 0.50576 0.14 +2010-10-15 00:00:00 -5.0000005 43.5 0.50576 0.099999994 +2010-11-15 00:00:00 -5.0000005 43.5 0.50576 0.34 +2010-12-15 00:00:00 -5.0000005 43.5 0.50576 0.37 +2011-01-15 00:00:00 -5.0000005 43.5 0.50576 0.55 +2011-02-15 00:00:00 -5.0000005 43.5 0.50576 0.96999997 +2011-03-15 00:00:00 -5.0000005 43.5 0.50576 1.65 +2011-04-15 00:00:00 -5.0000005 43.5 0.50576 1.16 +2011-05-15 00:00:00 -5.0000005 43.5 0.50576 0.32 +2011-06-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2011-07-15 00:00:00 -5.0000005 43.5 0.50576 0.089999996 +2011-08-15 00:00:00 -5.0000005 43.5 0.50576 0.03 +2011-09-15 00:00:00 -5.0000005 43.5 0.50576 0.02 +2011-10-15 00:00:00 -5.0000005 43.5 0.50576 0.25 +2011-11-15 00:00:00 -5.0000005 43.5 0.50576 0.34 +2011-12-15 00:00:00 -5.0000005 43.5 0.50576 0.37 +2012-01-15 00:00:00 -5.0000005 43.5 0.50576 0.45 +2012-02-15 00:00:00 -5.0000005 43.5 0.50576 0.68 +2012-03-15 00:00:00 -5.0000005 43.5 0.50576 1.81 +2012-04-15 00:00:00 -5.0000005 43.5 0.50576 1.75 +2012-05-15 00:00:00 -5.0000005 43.5 0.50576 1.03 +2012-06-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2012-07-15 00:00:00 -5.0000005 43.5 0.50576 0.01 +2012-08-15 00:00:00 -5.0000005 43.5 0.50576 0.01 +2012-09-15 00:00:00 -5.0000005 43.5 0.50576 0.099999994 +2012-10-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2012-11-15 00:00:00 -5.0000005 43.5 0.50576 0.14 +2012-12-15 00:00:00 -5.0000005 43.5 0.50576 0.34 +2013-01-15 00:00:00 -5.0000005 43.5 0.50576 0.5 +2013-02-15 00:00:00 -5.0000005 43.5 0.50576 1.09 +2013-03-15 00:00:00 -5.0000005 43.5 0.50576 1.62 +2013-04-15 00:00:00 -5.0000005 43.5 0.50576 1.4 +2013-05-15 00:00:00 -5.0000005 43.5 0.50576 0.37 +2013-06-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2013-07-15 00:00:00 -5.0000005 43.5 0.50576 0.48 +2013-08-15 00:00:00 -5.0000005 43.5 0.50576 0.08 +2013-09-15 00:00:00 -5.0000005 43.5 0.50576 0.21 +2013-10-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2013-11-15 00:00:00 -5.0000005 43.5 0.50576 0.37 +2013-12-15 00:00:00 -5.0000005 43.5 0.50576 0.59 +2014-01-15 00:00:00 -5.0000005 43.5 0.50576 0.56 +2014-02-15 00:00:00 -5.0000005 43.5 0.50576 0.90999997 +2014-03-15 00:00:00 -5.0000005 43.5 0.50576 1.3299999 +2014-04-15 00:00:00 -5.0000005 43.5 0.50576 1.09 +2014-05-15 00:00:00 -5.0000005 43.5 0.50576 0.37 +2014-06-15 00:00:00 -5.0000005 43.5 0.50576 0.11 +2014-07-15 00:00:00 -5.0000005 43.5 0.50576 0.02 +2014-08-15 00:00:00 -5.0000005 43.5 0.50576 0.02 +2014-09-15 00:00:00 -5.0000005 43.5 0.50576 0.11 +2014-10-15 00:00:00 -5.0000005 43.5 0.50576 0.02 +2014-11-15 00:00:00 -5.0000005 43.5 0.50576 0.07 +2014-12-15 00:00:00 -5.0000005 43.5 0.50576 0.17
--- a/test-data/version.tabular Thu Jan 20 17:07:19 2022 +0000 +++ b/test-data/version.tabular Sun Jul 31 21:20:41 2022 +0000 @@ -1,1 +1,1 @@ -Galaxy xarray version 0.20.2 +Galaxy xarray version 2022.3.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/timeseries.py Sun Jul 31 21:20:41 2022 +0000 @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +# +# +# usage: netCDF_timeseries.py [-h] [--output output.png] +# [--save timeseries.tabular] +# [--config config-file] +# [-v] +# input varname +# positional arguments: +# input input filename with geographical coordinates (netCDF +# format) +# varname Specify which variable to extract (case sensitive) +# +# optional arguments: +# -h, --help show this help message and exit +# --output output.png filename to store image (png format) +# --save timeseries.tabular filename to store timeseries (tabular format) +# --config config file extract parameters +# -v, --verbose switch on verbose mode +# +import argparse +import ast +import warnings + +import cftime # noqa: F401 + +import matplotlib as mpl +mpl.use('Agg') + +import matplotlib.pyplot as plt # noqa: I202,E402 +from matplotlib.dates import DateFormatter # noqa: I202,E402 + +import xarray as xr # noqa: I202,E402 + + +class TimeSeries (): + def __init__(self, input, varname, output, save, verbose=False, + config_file=""): + + li = list(input.split(",")) + if len(li) > 1: + self.input = li + else: + self.input = input + + self.varname = varname + self.xylim_supported = True + if output == "" or output is None: + self.output = "Timeseries.png" + else: + self.output = output + if save == "" or save is None: + self.save = "Timeseries.tabular" + else: + self.save = save + self.verbose = verbose + self.time_start_value = "" + self.time_end_value = "" + self.lon_value = "" + self.lat_value = "" + self.lat_name = 'lat' + self.lon_name = 'lon' + self.time_name = 'time' + self.title = '' + self.xlabel = '' + self.ylabel = '' + self.format_date = '' + if config_file != "" and config_file is not None: + with open(config_file) as f: + sdict = ''.join( + f.read().replace("\n", "").split('{')[1].split('}')[0] + ) + tmp = ast.literal_eval('{' + sdict.strip() + '}') + for key in tmp: + if key == 'time_start_value': + self.time_start_value = tmp[key] + if key == 'time_end_value': + self.time_end_value = tmp[key] + if key == 'lon_value': + self.lon_value = tmp[key] + if key == 'lat_value': + self.lat_value = tmp[key] + if key == 'lon_name': + self.lon_name = tmp[key] + if key == 'lat_name': + self.lat_name = tmp[key] + if key == 'time_name': + self.time_name = tmp[key] + if key == 'title': + self.title = tmp[key] + if key == 'xlabel': + self.xlabel = tmp[key] + if key == 'ylabel': + self.ylabel = tmp[key] + if key == 'format_date': + self.format_date = tmp[key] + self.format_date = self.format_date.replace('X', '%') + + if type(self.input) is list: + self.dset = xr.open_mfdataset(self.input, use_cftime=True) + else: + self.dset = xr.open_dataset(self.input, use_cftime=True) + + if verbose: + print("input: ", self.input) + print("varname: ", self.varname) + if self.time_start_value: + print("time_start_value: ", self.time_start_value) + if self.time_end_value: + print("time_end_value: ", self.time_end_value) + print("output: ", self.output) + if self.lon_value: + print(self.lon_name, self.lon_value) + if self.lat_value: + print(self.lat_name, self.lat_value) + + def plot(self): + if self.lon_value: + lon_c = float(self.lon_value) + if self.lat_value: + lat_c = float(self.lat_value) + if self.lat_value and self.lon_value: + self.df = self.dset.sel({self.lat_name: lat_c, + self.lon_name: lon_c}, + method='nearest') + else: + self.df = self.dset + if self.time_start_value or self.time_end_value: + self.df = self.df.sel({self.time_name: slice(self.time_start_value, + self.time_end_value)}) + # Saving the time series into a tabular + self.df = self.df[self.varname].squeeze().to_dataframe() + self.df.dropna().to_csv(self.save, sep='\t') + # Plot the time series into png image + fig = plt.figure(figsize=(15, 5)) + ax = plt.subplot(111) + self.df[self.varname].plot(ax=ax) + if self.title: + plt.title(self.title) + if self.xlabel: + plt.xlabel(self.xlabel) + if self.ylabel: + plt.ylabel(self.ylabel) + if self.format_date: + ax.xaxis.set_major_formatter(DateFormatter(self.format_date)) + fig.tight_layout() + fig.savefig(self.output) + + +if __name__ == '__main__': + warnings.filterwarnings("ignore") + parser = argparse.ArgumentParser() + parser.add_argument( + 'input', + help='input filename with geographical coordinates (netCDF format)' + ) + parser.add_argument( + 'varname', + help='Specify which variable to plot (case sensitive)' + ) + parser.add_argument( + '--output', + help='output filename to store resulting image (png format)' + ) + parser.add_argument( + '--save', + help='save resulting tabular file (tabular format) into filename' + ) + parser.add_argument( + '--config', + help='pass timeseries parameters via a config file' + ) + parser.add_argument( + "-v", "--verbose", + help="switch on verbose mode", + action="store_true") + args = parser.parse_args() + + dset = TimeSeries(input=args.input, varname=args.varname, + output=args.output, save=args.save, verbose=args.verbose, + config_file=args.config) + dset.plot()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xarray_info.py Sun Jul 31 21:20:41 2022 +0000 @@ -0,0 +1,107 @@ +# xarray tool for: +# - getting metadata information +# - select data and save results in csv file for further post-processing + +import argparse +import csv +import os +import warnings + +import xarray as xr + + +class XarrayInfo (): + def __init__(self, infile, outfile_info="", outfile_summary="", + verbose=False, coords_info=None): + self.infile = infile + self.outfile_info = outfile_info + self.outfile_summary = outfile_summary + self.coords_info = coords_info + self.verbose = verbose + # initialization + self.dset = None + self.gset = None + if self.verbose: + print("infile: ", self.infile) + print("outfile_info: ", self.outfile_info) + print("outfile_summary: ", self.outfile_summary) + print("coords_info: ", self.coords_info) + + def info(self): + f = open(self.outfile_info, 'w') + ds = xr.open_dataset(self.infile) + ds.info(f) + f.close() + + def summary(self): + f = open(self.outfile_summary, 'w') + ds = xr.open_dataset(self.infile) + writer = csv.writer(f, delimiter='\t') + header = ['VariableName', 'NumberOfDimensions'] + for idx, val in enumerate(ds.dims.items()): + header.append('Dim' + str(idx) + 'Name') + header.append('Dim' + str(idx) + 'Size') + writer.writerow(header) + for name, da in ds.data_vars.items(): + line = [name] + line.append(len(ds[name].shape)) + for d, s in zip(da.shape, da.sizes): + line.append(s) + line.append(d) + writer.writerow(line) + for name, da in ds.coords.items(): + line = [name] + line.append(len(ds[name].shape)) + for d, s in zip(da.shape, da.sizes): + line.append(s) + line.append(d) + writer.writerow(line) + f.close() + + def get_coords_info(self): + ds = xr.open_dataset(self.infile) + for c in ds.coords: + filename = os.path.join(self.coords_info, + c.strip() + + '.tabular') + pd = ds.coords[c].to_pandas() + pd.index = range(len(pd)) + pd.to_csv(filename, header=False, sep='\t') + + +if __name__ == '__main__': + warnings.filterwarnings("ignore") + parser = argparse.ArgumentParser() + + parser.add_argument( + 'infile', + help='netCDF input filename' + ) + parser.add_argument( + '--info', + help='Output filename where metadata information is stored' + ) + parser.add_argument( + '--summary', + help='Output filename where data summary information is stored' + ) + parser.add_argument( + '--coords_info', + help='output-folder where for each coordinate, coordinate values ' + ' are being printed in the corresponding outputfile' + ) + parser.add_argument( + "-v", "--verbose", + help="switch on verbose mode", + action="store_true" + ) + args = parser.parse_args() + + p = XarrayInfo(args.infile, args.info, args.summary, + args.verbose, args.coords_info) + if args.info: + p.info() + elif args.coords_info: + p.get_coords_info() + if args.summary: + p.summary()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xarray_select.py Sun Jul 31 21:20:41 2022 +0000 @@ -0,0 +1,294 @@ +# xarray tool for: +# - getting metadata information +# - select data and save results in csv file for further post-processing + +import argparse +import os +import warnings + +import geopandas as gdp + +import pandas as pd + +from shapely.geometry import Point +from shapely.ops import nearest_points + +import xarray as xr + + +class XarraySelect (): + def __init__(self, infile, select="", outfile="", outputdir="", + latname="", latvalN="", latvalS="", lonname="", + lonvalE="", lonvalW="", filter_list="", coords="", + time="", verbose=False, no_missing=False, + tolerance=None): + self.infile = infile + self.select = select + self.outfile = outfile + self.outputdir = outputdir + self.latname = latname + if tolerance != "" and tolerance is not None: + self.tolerance = float(tolerance) + else: + self.tolerance = -1 + if latvalN != "" and latvalN is not None: + self.latvalN = float(latvalN) + else: + self.latvalN = "" + if latvalS != "" and latvalS is not None: + self.latvalS = float(latvalS) + else: + self.latvalS = "" + self.lonname = lonname + if lonvalE != "" and lonvalE is not None: + self.lonvalE = float(lonvalE) + else: + self.lonvalE = "" + if lonvalW != "" and lonvalW is not None: + self.lonvalW = float(lonvalW) + else: + self.lonvalW = "" + self.filter = filter_list + self.time = time + self.coords = coords + self.verbose = verbose + self.no_missing = no_missing + # initialization + self.dset = None + self.gset = None + if self.verbose: + print("infile: ", self.infile) + print("outfile: ", self.outfile) + print("select: ", self.select) + print("outfile: ", self.outfile) + print("outputdir: ", self.outputdir) + print("latname: ", self.latname) + print("latvalN: ", self.latvalN) + print("latvalS: ", self.latvalS) + print("lonname: ", self.lonname) + print("lonvalE: ", self.lonvalE) + print("lonvalW: ", self.lonvalW) + print("filter: ", self.filter) + print("time: ", self.time) + print("coords: ", self.coords) + + def rowfilter(self, single_filter): + split_filter = single_filter.split('#') + filter_varname = split_filter[0] + op = split_filter[1] + ll = float(split_filter[2]) + if (op == 'bi'): + rl = float(split_filter[3]) + if filter_varname == self.select: + # filter on values of the selected variable + if op == 'bi': + self.dset = self.dset.where( + (self.dset <= rl) & (self.dset >= ll) + ) + elif op == 'le': + self.dset = self.dset.where(self.dset <= ll) + elif op == 'ge': + self.dset = self.dset.where(self.dset >= ll) + elif op == 'e': + self.dset = self.dset.where(self.dset == ll) + else: # filter on other dimensions of the selected variable + if op == 'bi': + self.dset = self.dset.sel({filter_varname: slice(ll, rl)}) + elif op == 'le': + self.dset = self.dset.sel({filter_varname: slice(None, ll)}) + elif op == 'ge': + self.dset = self.dset.sel({filter_varname: slice(ll, None)}) + elif op == 'e': + self.dset = self.dset.sel({filter_varname: ll}, + method='nearest') + + def selection(self): + if self.dset is None: + self.ds = xr.open_dataset(self.infile) + self.dset = self.ds[self.select] # select variable + if self.time: + self.datetime_selection() + if self.filter: + self.filter_selection() + + self.area_selection() + if self.gset.count() > 1: + # convert to dataframe if several rows and cols + self.gset = self.gset.to_dataframe().dropna(how='all'). \ + reset_index() + self.gset.to_csv(self.outfile, header=True, sep='\t') + else: + data = { + self.latname: [self.gset[self.latname].values], + self.lonname: [self.gset[self.lonname].values], + self.select: [self.gset.values] + } + + df = pd.DataFrame(data, columns=[self.latname, self.lonname, + self.select]) + df.to_csv(self.outfile, header=True, sep='\t') + + def datetime_selection(self): + split_filter = self.time.split('#') + time_varname = split_filter[0] + op = split_filter[1] + ll = split_filter[2] + if (op == 'sl'): + rl = split_filter[3] + self.dset = self.dset.sel({time_varname: slice(ll, rl)}) + elif (op == 'to'): + self.dset = self.dset.sel({time_varname: slice(None, ll)}) + elif (op == 'from'): + self.dset = self.dset.sel({time_varname: slice(ll, None)}) + elif (op == 'is'): + self.dset = self.dset.sel({time_varname: ll}, method='nearest') + + def filter_selection(self): + for single_filter in self.filter: + self.rowfilter(single_filter) + + def area_selection(self): + + if self.latvalS != "" and self.lonvalW != "": + # Select geographical area + self.gset = self.dset.sel({self.latname: + slice(self.latvalS, self.latvalN), + self.lonname: + slice(self.lonvalW, self.lonvalE)}) + elif self.latvalN != "" and self.lonvalE != "": + # select nearest location + if self.no_missing: + self.nearest_latvalN = self.latvalN + self.nearest_lonvalE = self.lonvalE + else: + # find nearest location without NaN values + self.nearest_location() + if self.tolerance > 0: + self.gset = self.dset.sel({self.latname: self.nearest_latvalN, + self.lonname: self.nearest_lonvalE}, + method='nearest', + tolerance=self.tolerance) + else: + self.gset = self.dset.sel({self.latname: self.nearest_latvalN, + self.lonname: self.nearest_lonvalE}, + method='nearest') + else: + self.gset = self.dset + + def nearest_location(self): + # Build a geopandas dataframe with all first elements in each dimension + # so we assume null values correspond to a mask that is the same for + # all dimensions in the dataset. + dsel_frame = self.dset + for dim in self.dset.dims: + if dim != self.latname and dim != self.lonname: + dsel_frame = dsel_frame.isel({dim: 0}) + # transform to pandas dataframe + dff = dsel_frame.to_dataframe().dropna().reset_index() + # transform to geopandas to collocate + gdf = gdp.GeoDataFrame(dff, + geometry=gdp.points_from_xy(dff[self.lonname], + dff[self.latname])) + # Find nearest location where values are not null + point = Point(self.lonvalE, self.latvalN) + multipoint = gdf.geometry.unary_union + queried_geom, nearest_geom = nearest_points(point, multipoint) + self.nearest_latvalN = nearest_geom.y + self.nearest_lonvalE = nearest_geom.x + + def selection_from_coords(self): + fcoords = pd.read_csv(self.coords, sep='\t') + for row in fcoords.itertuples(): + self.latvalN = row[0] + self.lonvalE = row[1] + self.outfile = (os.path.join(self.outputdir, + self.select + '_' + + str(row.Index) + '.tabular')) + self.selection() + + +if __name__ == '__main__': + warnings.filterwarnings("ignore") + parser = argparse.ArgumentParser() + + parser.add_argument( + 'infile', + help='netCDF input filename' + ) + parser.add_argument( + '--select', + help='Variable name to select' + ) + parser.add_argument( + '--latname', + help='Latitude name' + ) + parser.add_argument( + '--latvalN', + help='North latitude value' + ) + parser.add_argument( + '--latvalS', + help='South latitude value' + ) + parser.add_argument( + '--lonname', + help='Longitude name' + ) + parser.add_argument( + '--lonvalE', + help='East longitude value' + ) + parser.add_argument( + '--lonvalW', + help='West longitude value' + ) + parser.add_argument( + '--tolerance', + help='Maximum distance between original and selected value for ' + ' inexact matches e.g. abs(index[indexer] - target) <= tolerance' + ) + parser.add_argument( + '--coords', + help='Input file containing Latitude and Longitude' + 'for geographical selection' + ) + parser.add_argument( + '--filter', + nargs="*", + help='Filter list variable#operator#value_s#value_e' + ) + parser.add_argument( + '--time', + help='select timeseries variable#operator#value_s[#value_e]' + ) + parser.add_argument( + '--outfile', + help='csv outfile for storing results of the selection' + '(valid only when --select)' + ) + parser.add_argument( + '--outputdir', + help='folder name for storing results with multiple selections' + '(valid only when --select)' + ) + parser.add_argument( + "-v", "--verbose", + help="switch on verbose mode", + action="store_true" + ) + parser.add_argument( + "--no_missing", + help="""Do not take into account possible null/missing values + (only valid for single location)""", + action="store_true" + ) + args = parser.parse_args() + + p = XarraySelect(args.infile, args.select, args.outfile, args.outputdir, + args.latname, args.latvalN, args.latvalS, args.lonname, + args.lonvalE, args.lonvalW, args.filter, + args.coords, args.time, args.verbose, + args.no_missing, args.tolerance) + if args.select: + p.selection()
--- a/xarray_select.xml Thu Jan 20 17:07:19 2022 +0000 +++ b/xarray_select.xml Sun Jul 31 21:20:41 2022 +0000 @@ -6,17 +6,18 @@ <expand macro="edam_ontology"/> <requirements> <requirement type="package" version="@TOOL_VERSION@">xarray</requirement> - <requirement type="package" version="3">python</requirement> - <requirement type="package" version="1.5.6">netcdf4</requirement> - <requirement type="package" version="0.9.0">geopandas</requirement> - <requirement type="package" version="1.7.1">shapely</requirement> + <requirement type="package" version="3.10">python</requirement> + <requirement type="package" version="1.8.2">shapely</requirement> + <requirement type="package" version="1.6.0">netcdf4</requirement> + <requirement type="package" version="1.4.3">pandas</requirement> + <requirement type="package" version="0.7.0">geopandas</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ mkdir output_dir && #if $condi_source_coord.coord_source=="coord_from_file" echo "Galaxy xarray version @TOOL_VERSION@"> output_dir/version.tabular && #end if - python '$__tool_directory__/xarray_tool.py' '$input' --select '$var' + python '$__tool_directory__/xarray_select.py' '$input' --select '$var' --verbose --filter #for $i,$uc in enumerate($user_choice)
--- a/xarray_tool.py Thu Jan 20 17:07:19 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,365 +0,0 @@ -# xarray tool for: -# - getting metadata information -# - select data and save results in csv file for further post-processing - -import argparse -import csv -import os -import warnings - -import geopandas as gdp - -import pandas as pd - -from shapely.geometry import Point -from shapely.ops import nearest_points - -import xarray as xr - - -class XarrayTool (): - def __init__(self, infile, outfile_info="", outfile_summary="", - select="", outfile="", outputdir="", latname="", - latvalN="", latvalS="", lonname="", lonvalE="", - lonvalW="", filter_list="", coords="", time="", - verbose=False, no_missing=False, coords_info=None, - tolerance=None): - self.infile = infile - self.outfile_info = outfile_info - self.outfile_summary = outfile_summary - self.select = select - self.outfile = outfile - self.outputdir = outputdir - self.latname = latname - if tolerance != "" and tolerance is not None: - self.tolerance = float(tolerance) - else: - self.tolerance = -1 - if latvalN != "" and latvalN is not None: - self.latvalN = float(latvalN) - else: - self.latvalN = "" - if latvalS != "" and latvalS is not None: - self.latvalS = float(latvalS) - else: - self.latvalS = "" - self.lonname = lonname - if lonvalE != "" and lonvalE is not None: - self.lonvalE = float(lonvalE) - else: - self.lonvalE = "" - if lonvalW != "" and lonvalW is not None: - self.lonvalW = float(lonvalW) - else: - self.lonvalW = "" - self.filter = filter_list - self.time = time - self.coords = coords - self.verbose = verbose - self.no_missing = no_missing - # initialization - self.dset = None - self.gset = None - self.coords_info = coords_info - if self.verbose: - print("infile: ", self.infile) - print("outfile_info: ", self.outfile_info) - print("outfile_summary: ", self.outfile_summary) - print("outfile: ", self.outfile) - print("select: ", self.select) - print("outfile: ", self.outfile) - print("outputdir: ", self.outputdir) - print("latname: ", self.latname) - print("latvalN: ", self.latvalN) - print("latvalS: ", self.latvalS) - print("lonname: ", self.lonname) - print("lonvalE: ", self.lonvalE) - print("lonvalW: ", self.lonvalW) - print("filter: ", self.filter) - print("time: ", self.time) - print("coords: ", self.coords) - print("coords_info: ", self.coords_info) - - def info(self): - f = open(self.outfile_info, 'w') - ds = xr.open_dataset(self.infile) - ds.info(f) - f.close() - - def summary(self): - f = open(self.outfile_summary, 'w') - ds = xr.open_dataset(self.infile) - writer = csv.writer(f, delimiter='\t') - header = ['VariableName', 'NumberOfDimensions'] - for idx, val in enumerate(ds.dims.items()): - header.append('Dim' + str(idx) + 'Name') - header.append('Dim' + str(idx) + 'Size') - writer.writerow(header) - for name, da in ds.data_vars.items(): - line = [name] - line.append(len(ds[name].shape)) - for d, s in zip(da.shape, da.sizes): - line.append(s) - line.append(d) - writer.writerow(line) - for name, da in ds.coords.items(): - line = [name] - line.append(len(ds[name].shape)) - for d, s in zip(da.shape, da.sizes): - line.append(s) - line.append(d) - writer.writerow(line) - f.close() - - def rowfilter(self, single_filter): - split_filter = single_filter.split('#') - filter_varname = split_filter[0] - op = split_filter[1] - ll = float(split_filter[2]) - if (op == 'bi'): - rl = float(split_filter[3]) - if filter_varname == self.select: - # filter on values of the selected variable - if op == 'bi': - self.dset = self.dset.where( - (self.dset <= rl) & (self.dset >= ll) - ) - elif op == 'le': - self.dset = self.dset.where(self.dset <= ll) - elif op == 'ge': - self.dset = self.dset.where(self.dset >= ll) - elif op == 'e': - self.dset = self.dset.where(self.dset == ll) - else: # filter on other dimensions of the selected variable - if op == 'bi': - self.dset = self.dset.sel({filter_varname: slice(ll, rl)}) - elif op == 'le': - self.dset = self.dset.sel({filter_varname: slice(None, ll)}) - elif op == 'ge': - self.dset = self.dset.sel({filter_varname: slice(ll, None)}) - elif op == 'e': - self.dset = self.dset.sel({filter_varname: ll}, - method='nearest') - - def selection(self): - if self.dset is None: - self.ds = xr.open_dataset(self.infile) - self.dset = self.ds[self.select] # select variable - if self.time: - self.datetime_selection() - if self.filter: - self.filter_selection() - - self.area_selection() - if self.gset.count() > 1: - # convert to dataframe if several rows and cols - self.gset = self.gset.to_dataframe().dropna(how='all'). \ - reset_index() - self.gset.to_csv(self.outfile, header=True, sep='\t') - else: - data = { - self.latname: [self.gset[self.latname].values], - self.lonname: [self.gset[self.lonname].values], - self.select: [self.gset.values] - } - - df = pd.DataFrame(data, columns=[self.latname, self.lonname, - self.select]) - df.to_csv(self.outfile, header=True, sep='\t') - - def datetime_selection(self): - split_filter = self.time.split('#') - time_varname = split_filter[0] - op = split_filter[1] - ll = split_filter[2] - if (op == 'sl'): - rl = split_filter[3] - self.dset = self.dset.sel({time_varname: slice(ll, rl)}) - elif (op == 'to'): - self.dset = self.dset.sel({time_varname: slice(None, ll)}) - elif (op == 'from'): - self.dset = self.dset.sel({time_varname: slice(ll, None)}) - elif (op == 'is'): - self.dset = self.dset.sel({time_varname: ll}, method='nearest') - - def filter_selection(self): - for single_filter in self.filter: - self.rowfilter(single_filter) - - def area_selection(self): - - if self.latvalS != "" and self.lonvalW != "": - # Select geographical area - self.gset = self.dset.sel({self.latname: - slice(self.latvalS, self.latvalN), - self.lonname: - slice(self.lonvalW, self.lonvalE)}) - elif self.latvalN != "" and self.lonvalE != "": - # select nearest location - if self.no_missing: - self.nearest_latvalN = self.latvalN - self.nearest_lonvalE = self.lonvalE - else: - # find nearest location without NaN values - self.nearest_location() - if self.tolerance > 0: - self.gset = self.dset.sel({self.latname: self.nearest_latvalN, - self.lonname: self.nearest_lonvalE}, - method='nearest', - tolerance=self.tolerance) - else: - self.gset = self.dset.sel({self.latname: self.nearest_latvalN, - self.lonname: self.nearest_lonvalE}, - method='nearest') - else: - self.gset = self.dset - - def nearest_location(self): - # Build a geopandas dataframe with all first elements in each dimension - # so we assume null values correspond to a mask that is the same for - # all dimensions in the dataset. - dsel_frame = self.dset - for dim in self.dset.dims: - if dim != self.latname and dim != self.lonname: - dsel_frame = dsel_frame.isel({dim: 0}) - # transform to pandas dataframe - dff = dsel_frame.to_dataframe().dropna().reset_index() - # transform to geopandas to collocate - gdf = gdp.GeoDataFrame(dff, - geometry=gdp.points_from_xy(dff[self.lonname], - dff[self.latname])) - # Find nearest location where values are not null - point = Point(self.lonvalE, self.latvalN) - multipoint = gdf.geometry.unary_union - queried_geom, nearest_geom = nearest_points(point, multipoint) - self.nearest_latvalN = nearest_geom.y - self.nearest_lonvalE = nearest_geom.x - - def selection_from_coords(self): - fcoords = pd.read_csv(self.coords, sep='\t') - for row in fcoords.itertuples(): - self.latvalN = row[0] - self.lonvalE = row[1] - self.outfile = (os.path.join(self.outputdir, - self.select + '_' + - str(row.Index) + '.tabular')) - self.selection() - - def get_coords_info(self): - ds = xr.open_dataset(self.infile) - for c in ds.coords: - filename = os.path.join(self.coords_info, - c.strip() + - '.tabular') - pd = ds.coords[c].to_pandas() - pd.index = range(len(pd)) - pd.to_csv(filename, header=False, sep='\t') - - -if __name__ == '__main__': - warnings.filterwarnings("ignore") - parser = argparse.ArgumentParser() - - parser.add_argument( - 'infile', - help='netCDF input filename' - ) - parser.add_argument( - '--info', - help='Output filename where metadata information is stored' - ) - parser.add_argument( - '--summary', - help='Output filename where data summary information is stored' - ) - parser.add_argument( - '--select', - help='Variable name to select' - ) - parser.add_argument( - '--latname', - help='Latitude name' - ) - parser.add_argument( - '--latvalN', - help='North latitude value' - ) - parser.add_argument( - '--latvalS', - help='South latitude value' - ) - parser.add_argument( - '--lonname', - help='Longitude name' - ) - parser.add_argument( - '--lonvalE', - help='East longitude value' - ) - parser.add_argument( - '--lonvalW', - help='West longitude value' - ) - parser.add_argument( - '--tolerance', - help='Maximum distance between original and selected value for ' - ' inexact matches e.g. abs(index[indexer] - target) <= tolerance' - ) - parser.add_argument( - '--coords', - help='Input file containing Latitude and Longitude' - 'for geographical selection' - ) - parser.add_argument( - '--coords_info', - help='output-folder where for each coordinate, coordinate values ' - ' are being printed in the corresponding outputfile' - ) - parser.add_argument( - '--filter', - nargs="*", - help='Filter list variable#operator#value_s#value_e' - ) - parser.add_argument( - '--time', - help='select timeseries variable#operator#value_s[#value_e]' - ) - parser.add_argument( - '--outfile', - help='csv outfile for storing results of the selection' - '(valid only when --select)' - ) - parser.add_argument( - '--outputdir', - help='folder name for storing results with multiple selections' - '(valid only when --select)' - ) - parser.add_argument( - "-v", "--verbose", - help="switch on verbose mode", - action="store_true" - ) - parser.add_argument( - "--no_missing", - help="""Do not take into account possible null/missing values - (only valid for single location)""", - action="store_true" - ) - args = parser.parse_args() - - p = XarrayTool(args.infile, args.info, args.summary, args.select, - args.outfile, args.outputdir, args.latname, - args.latvalN, args.latvalS, args.lonname, - args.lonvalE, args.lonvalW, args.filter, - args.coords, args.time, args.verbose, - args.no_missing, args.coords_info, args.tolerance) - if args.info: - p.info() - if args.summary: - p.summary() - if args.coords: - p.selection_from_coords() - elif args.select: - p.selection() - elif args.coords_info: - p.get_coords_info()