Mercurial > repos > ecology > xarray_select
changeset 0:225d0d275a24 draft
"planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit f1455c158011dc4aab0fd469cf794be6f4142992"
author | ecology |
---|---|
date | Fri, 22 May 2020 05:19:15 -0400 |
parents | |
children | 6baac361495b |
files | README.md test-data/Metadata_infos_from_dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc.Variables.tab test-data/Test1.tabular test-data/Test2.tabular test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc test-data/info_file.txt test-data/var_tab_dataset-ibi xarray_select.xml xarray_tool.py |
diffstat | 9 files changed, 868 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,8 @@ +# Xarray tools for netCDF +## netCDF metadata information + +The first tool `xarray_metadata_info ` uses xarray to provide users with general information about variable names, dimensions +and attributes. +Variables that can be extracted and dimensions available are printed in a tabular file. + +The tool also print a general information file. It's the result of the xarray method info().
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Metadata_infos_from_dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc.Variables.tab Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,8 @@ +VariableName NumberOfDimensions Dim0Name Dim0Size Dim1Name Dim1Size Dim2Name Dim2Size Dim3Name Dim3Size +phy 4 time 145 depth 1 latitude 97 longitude 103 +chl 4 time 145 depth 1 latitude 97 longitude 103 +nh4 4 time 145 depth 1 latitude 97 longitude 103 +time 1 time 145 +longitude 1 longitude 103 +latitude 1 latitude 97 +depth 1 depth 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Test1.tabular Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,146 @@ + time depth longitude latitude phy +0 2002-12-15 0.5057600140571594 -2.0000007 44.0 1.0500183 +1 2003-01-15 0.5057600140571594 -2.0000007 44.0 1.25 +2 2003-02-15 0.5057600140571594 -2.0000007 44.0 1.3000183 +3 2003-03-15 0.5057600140571594 -2.0000007 44.0 6.0599976 +4 2003-04-15 0.5057600140571594 -2.0000007 44.0 2.25 +5 2003-05-15 0.5057600140571594 -2.0000007 44.0 0.6499939 +6 2003-06-15 0.5057600140571594 -2.0000007 44.0 0.42999268 +7 2003-07-15 0.5057600140571594 -2.0000007 44.0 0.42999268 +8 2003-08-15 0.5057600140571594 -2.0000007 44.0 0.480011 +9 2003-09-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +10 2003-10-15 0.5057600140571594 -2.0000007 44.0 0.5 +11 2003-11-15 0.5057600140571594 -2.0000007 44.0 0.9299927 +12 2003-12-15 0.5057600140571594 -2.0000007 44.0 1.3900146 +13 2004-01-15 0.5057600140571594 -2.0000007 44.0 1.7400208 +14 2004-02-15 0.5057600140571594 -2.0000007 44.0 4.5 +15 2004-03-15 0.5057600140571594 -2.0000007 44.0 5.5500183 +16 2004-04-15 0.5057600140571594 -2.0000007 44.0 5.3099976 +17 2004-05-15 0.5057600140571594 -2.0000007 44.0 3.75 +18 2004-06-15 0.5057600140571594 -2.0000007 44.0 0.77001953 +19 2004-07-15 0.5057600140571594 -2.0000007 44.0 0.5 +20 2004-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +21 2004-09-15 0.5057600140571594 -2.0000007 44.0 0.4500122 +22 2004-10-15 0.5057600140571594 -2.0000007 44.0 0.480011 +23 2004-11-15 0.5057600140571594 -2.0000007 44.0 0.83999634 +24 2004-12-15 0.5057600140571594 -2.0000007 44.0 1.7400208 +25 2005-01-15 0.5057600140571594 -2.0000007 44.0 1.7700195 +26 2005-02-15 0.5057600140571594 -2.0000007 44.0 1.5500183 +27 2005-03-15 0.5057600140571594 -2.0000007 44.0 7.149994 +28 2005-04-15 0.5057600140571594 -2.0000007 44.0 3.649994 +29 2005-05-15 0.5057600140571594 -2.0000007 44.0 2.5200195 +30 2005-06-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +31 2005-07-15 0.5057600140571594 -2.0000007 44.0 0.6700134 +32 2005-08-15 0.5057600140571594 -2.0000007 44.0 0.4500122 +33 2005-09-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +34 2005-10-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +35 2005-11-15 0.5057600140571594 -2.0000007 44.0 0.6199951 +36 2005-12-15 0.5057600140571594 -2.0000007 44.0 1.1499939 +37 2006-01-15 0.5057600140571594 -2.0000007 44.0 3.5299988 +38 2006-02-15 0.5057600140571594 -2.0000007 44.0 7.1799927 +39 2006-03-15 0.5057600140571594 -2.0000007 44.0 6.5599976 +40 2006-04-15 0.5057600140571594 -2.0000007 44.0 3.8000183 +41 2006-05-15 0.5057600140571594 -2.0000007 44.0 0.95999146 +42 2006-06-15 0.5057600140571594 -2.0000007 44.0 1.5 +43 2006-07-15 0.5057600140571594 -2.0000007 44.0 1.0299988 +44 2006-08-15 0.5057600140571594 -2.0000007 44.0 0.480011 +45 2006-09-15 0.5057600140571594 -2.0000007 44.0 0.49002075 +46 2006-10-15 0.5057600140571594 -2.0000007 44.0 0.480011 +47 2006-11-15 0.5057600140571594 -2.0000007 44.0 0.9299927 +48 2006-12-15 0.5057600140571594 -2.0000007 44.0 1.2099915 +49 2007-01-15 0.5057600140571594 -2.0000007 44.0 1.1499939 +50 2007-02-15 0.5057600140571594 -2.0000007 44.0 1.7000122 +51 2007-03-15 0.5057600140571594 -2.0000007 44.0 5.230011 +52 2007-04-15 0.5057600140571594 -2.0000007 44.0 3.8600159 +53 2007-05-15 0.5057600140571594 -2.0000007 44.0 0.83999634 +54 2007-06-15 0.5057600140571594 -2.0000007 44.0 0.6799927 +55 2007-07-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +56 2007-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +57 2007-09-15 0.5057600140571594 -2.0000007 44.0 0.5 +58 2007-10-15 0.5057600140571594 -2.0000007 44.0 0.89001465 +59 2007-11-15 0.5057600140571594 -2.0000007 44.0 2.0299988 +60 2007-12-15 0.5057600140571594 -2.0000007 44.0 1.8399963 +61 2008-01-15 0.5057600140571594 -2.0000007 44.0 1.3399963 +62 2008-02-15 0.5057600140571594 -2.0000007 44.0 3.149994 +63 2008-03-15 0.5057600140571594 -2.0000007 44.0 4.5899963 +64 2008-04-15 0.5057600140571594 -2.0000007 44.0 5.080017 +65 2008-05-15 0.5057600140571594 -2.0000007 44.0 1.0 +66 2008-06-15 0.5057600140571594 -2.0000007 44.0 1.5299988 +67 2008-07-15 0.5057600140571594 -2.0000007 44.0 0.55999756 +68 2008-08-15 0.5057600140571594 -2.0000007 44.0 0.42999268 +69 2008-09-15 0.5057600140571594 -2.0000007 44.0 0.42999268 +70 2008-10-15 0.5057600140571594 -2.0000007 44.0 0.42999268 +71 2008-11-15 0.5057600140571594 -2.0000007 44.0 0.64001465 +72 2008-12-15 0.5057600140571594 -2.0000007 44.0 2.4200134 +73 2009-01-15 0.5057600140571594 -2.0000007 44.0 2.3900146 +74 2009-02-15 0.5057600140571594 -2.0000007 44.0 6.2099915 +75 2009-03-15 0.5057600140571594 -2.0000007 44.0 4.6799927 +76 2009-04-15 0.5057600140571594 -2.0000007 44.0 1.1100159 +77 2009-05-15 0.5057600140571594 -2.0000007 44.0 2.649994 +78 2009-06-15 0.5057600140571594 -2.0000007 44.0 1.4900208 +79 2009-07-15 0.5057600140571594 -2.0000007 44.0 0.5 +80 2009-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +81 2009-09-15 0.5057600140571594 -2.0000007 44.0 0.5800171 +82 2009-10-15 0.5057600140571594 -2.0000007 44.0 0.6499939 +83 2009-11-15 0.5057600140571594 -2.0000007 44.0 0.8999939 +84 2009-12-15 0.5057600140571594 -2.0000007 44.0 1.3099976 +85 2010-01-15 0.5057600140571594 -2.0000007 44.0 1.5299988 +86 2010-02-15 0.5057600140571594 -2.0000007 44.0 2.9599915 +87 2010-03-15 0.5057600140571594 -2.0000007 44.0 5.450012 +88 2010-04-15 0.5057600140571594 -2.0000007 44.0 7.5899963 +89 2010-05-15 0.5057600140571594 -2.0000007 44.0 1.8000183 +90 2010-06-15 0.5057600140571594 -2.0000007 44.0 0.480011 +91 2010-07-15 0.5057600140571594 -2.0000007 44.0 0.5 +92 2010-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +93 2010-09-15 0.5057600140571594 -2.0000007 44.0 0.49002075 +94 2010-10-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +95 2010-11-15 0.5057600140571594 -2.0000007 44.0 0.9299927 +96 2010-12-15 0.5057600140571594 -2.0000007 44.0 1.1499939 +97 2011-01-15 0.5057600140571594 -2.0000007 44.0 2.4900208 +98 2011-02-15 0.5057600140571594 -2.0000007 44.0 5.1799927 +99 2011-03-15 0.5057600140571594 -2.0000007 44.0 7.029999 +100 2011-04-15 0.5057600140571594 -2.0000007 44.0 2.4900208 +101 2011-05-15 0.5057600140571594 -2.0000007 44.0 0.6499939 +102 2011-06-15 0.5057600140571594 -2.0000007 44.0 0.52001953 +103 2011-07-15 0.5057600140571594 -2.0000007 44.0 0.5 +104 2011-08-15 0.5057600140571594 -2.0000007 44.0 0.75 +105 2011-09-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +106 2011-10-15 0.5057600140571594 -2.0000007 44.0 0.480011 +107 2011-11-15 0.5057600140571594 -2.0000007 44.0 0.730011 +108 2011-12-15 0.5057600140571594 -2.0000007 44.0 1.0299988 +109 2012-01-15 0.5057600140571594 -2.0000007 44.0 3.149994 +110 2012-02-15 0.5057600140571594 -2.0000007 44.0 2.3099976 +111 2012-03-15 0.5057600140571594 -2.0000007 44.0 5.5200195 +112 2012-04-15 0.5057600140571594 -2.0000007 44.0 3.399994 +113 2012-05-15 0.5057600140571594 -2.0000007 44.0 3.7000122 +114 2012-06-15 0.5057600140571594 -2.0000007 44.0 2.5899963 +115 2012-07-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +116 2012-08-15 0.5057600140571594 -2.0000007 44.0 0.4500122 +117 2012-09-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +118 2012-10-15 0.5057600140571594 -2.0000007 44.0 0.61001587 +119 2012-11-15 0.5057600140571594 -2.0000007 44.0 2.0299988 +120 2012-12-15 0.5057600140571594 -2.0000007 44.0 1.4200134 +121 2013-01-15 0.5057600140571594 -2.0000007 44.0 2.2700195 +122 2013-02-15 0.5057600140571594 -2.0000007 44.0 7.0 +123 2013-03-15 0.5057600140571594 -2.0000007 44.0 10.550018 +124 2013-04-15 0.5057600140571594 -2.0000007 44.0 5.8399963 +125 2013-05-15 0.5057600140571594 -2.0000007 44.0 1.2400208 +126 2013-06-15 0.5057600140571594 -2.0000007 44.0 4.1700134 +127 2013-07-15 0.5057600140571594 -2.0000007 44.0 3.2099915 +128 2013-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +129 2013-09-15 0.5057600140571594 -2.0000007 44.0 0.480011 +130 2013-10-15 0.5057600140571594 -2.0000007 44.0 0.49002075 +131 2013-11-15 0.5057600140571594 -2.0000007 44.0 0.7799988 +132 2013-12-15 0.5057600140571594 -2.0000007 44.0 1.4500122 +133 2014-01-15 0.5057600140571594 -2.0000007 44.0 0.95999146 +134 2014-02-15 0.5057600140571594 -2.0000007 44.0 1.3900146 +135 2014-03-15 0.5057600140571594 -2.0000007 44.0 5.779999 +136 2014-04-15 0.5057600140571594 -2.0000007 44.0 5.4299927 +137 2014-05-15 0.5057600140571594 -2.0000007 44.0 1.1799927 +138 2014-06-15 0.5057600140571594 -2.0000007 44.0 0.730011 +139 2014-07-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +140 2014-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +141 2014-09-15 0.5057600140571594 -2.0000007 44.0 0.5 +142 2014-10-15 0.5057600140571594 -2.0000007 44.0 0.6199951 +143 2014-11-15 0.5057600140571594 -2.0000007 44.0 0.480011 +144 2014-12-15 0.5057600140571594 -2.0000007 44.0 0.55999756
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Test2.tabular Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,25 @@ + time depth latitude longitude nh4 +0 2003-12-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 81.27 +1 2003-12-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 78.08 +2 2003-12-15 0.5057600140571594 45.5 -0.9166674017906189 55.149998 +3 2004-01-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 65.2 +4 2004-01-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 64.11 +5 2004-02-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 51.0 +6 2004-02-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 51.32 +7 2004-05-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 54.53 +8 2004-06-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 79.79 +9 2004-06-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 61.52 +10 2004-07-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 99.159996 +11 2004-07-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 77.93 +12 2004-08-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 110.149994 +13 2004-08-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 86.759995 +14 2004-09-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 112.369995 +15 2004-09-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 91.979996 +16 2004-10-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 109.63 +17 2004-10-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 95.509995 +18 2004-11-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 98.45 +19 2004-11-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 93.11 +20 2004-11-15 0.5057600140571594 45.5 -0.9166674017906189 56.78 +21 2004-12-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 84.25 +22 2004-12-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 81.83 +23 2004-12-15 0.5057600140571594 45.5 -0.9166674017906189 57.07
Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/info_file.txt Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,74 @@ +xarray.Dataset { +dimensions: + depth = 1 ; + latitude = 97 ; + longitude = 103 ; + time = 145 ; + +variables: + float32 phy(time, depth, latitude, longitude) ; + phy:_CoordinateAxes = time depth latitude longitude ; + phy:long_name = Mole Concentration of Phytoplankton expressed as carbon in sea water ; + phy:standard_name = mole_concentration_of_phytoplankton_expressed_as_carbon_in_sea_water ; + phy:units = mmol.m-3 ; + phy:unit_long = mole_concentration_of_phytoplankton_expressed_as_carbon_in_sea_water ; + datetime64[ns] time(time) ; + time:standard_name = time ; + time:long_name = time ; + time:_CoordinateAxisType = Time ; + time:axis = T ; + float32 chl(time, depth, latitude, longitude) ; + chl:_CoordinateAxes = time depth latitude longitude ; + chl:long_name = Mass Concentration of Chlorophyll in Sea Water ; + chl:standard_name = mass_concentration_of_chlorophyll_in_sea_water ; + chl:units = mg.m-3 ; + chl:unit_long = milligram of chlorophyll per cubic meter ; + float32 nh4(time, depth, latitude, longitude) ; + nh4:_CoordinateAxes = time depth latitude longitude ; + nh4:long_name = Mole Concentration of Ammonium in Sea Water ; + nh4:standard_name = mole_concentration_of_ammonium_in_sea_water ; + nh4:units = mmol.m-3 ; + nh4:unit_long = millimoles of Ammonium per cubic meter ; + float32 longitude(longitude) ; + longitude:long_name = Longitude ; + longitude:units = degrees_east ; + longitude:standard_name = longitude ; + longitude:axis = X ; + longitude:unit_long = Degrees East ; + longitude:step = 0.08333f ; + longitude:_CoordinateAxisType = Lon ; + float32 latitude(latitude) ; + latitude:long_name = Latitude ; + latitude:units = degrees_north ; + latitude:standard_name = latitude ; + latitude:axis = Y ; + latitude:unit_long = Degrees North ; + latitude:step = 0.08333f ; + latitude:_CoordinateAxisType = Lat ; + float32 depth(depth) ; + depth:long_name = Depth ; + depth:units = m ; + depth:axis = Z ; + depth:positive = down ; + depth:unit_long = Meters ; + depth:standard_name = depth ; + depth:_CoordinateAxisType = Height ; + depth:_CoordinateZisPositive = down ; + +// global attributes: + :title = CMEMS IBI REANALYSIS: MONTHLY BIOGEOCHEMICAL PRODUCTS (REGULAR GRID) ; + :institution = Puertos del Estado (PdE) - Mercator-Ocean (MO) ; + :references = http://marine.copernicus.eu ; + :source = CMEMS IBI-MFC ; + :Conventions = CF-1.0 ; + :history = Data extracted from dataset http://puertos2.cesga.es:8080/thredds/dodsC/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid ; + :time_min = 7272.0 ; + :time_max = 112464.0 ; + :julian_day_unit = Hours since 2002-02-15 ; + :z_min = 0.5057600140571594 ; + :z_max = 0.5057600140571594 ; + :latitude_min = 43.0 ; + :latitude_max = 51.0 ; + :longitude_min = -6.000000476837158 ; + :longitude_max = 2.4999990463256836 ; +} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/var_tab_dataset-ibi Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,7 @@ +time 1 time 145 +chl 4 time 145 depth 1 latitude 97 longitude 103 +nh4 4 time 145 depth 1 latitude 97 longitude 103 +longitude 1 longitude 103 +latitude 1 latitude 97 +depth 1 depth 1 +phy 4 time 145 depth 1 latitude 97 longitude 103
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xarray_select.xml Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,298 @@ +<tool id="xarray_select" name="NetCDF xarray Selection" version="0.15.1"> + <description>extracts variable values with custom conditions on dimensions</description> + <requirements> + <requirement type="package" version="3">python</requirement> + <requirement type="package" version="1.5.3">netcdf4</requirement> + <requirement type="package" version="0.15.1">xarray</requirement> + <requirement type="package" version="0.7.0">geopandas</requirement> + <requirement type="package" version="1.7.0">shapely</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + mkdir output_dir && + python '$__tool_directory__/xarray_tool.py' '$input' --select '$var' + --verbose + --filter + #for $i,$uc in enumerate($user_choice) + #if $uc.condi_between.comparator=="bi" + '${uc.dim}#${uc.condi_between.comparator}#${uc.condi_between.t1}#${uc.condi_between.t2}' + #else + '${uc.dim}#${uc.condi_between.comparator}#${uc.condi_between.value}' + #end if + #end for + + #if $time.condi_datetime.datetime=="yes" + --time + #if $time.condi_datetime.condi_between.comparator=="sl" + '${time.condi_datetime.dim}#${time.condi_datetime.condi_between.comparator}#${time.condi_datetime.condi_between.t1}#${time.condi_datetime.condi_between.t2}' + #else + '${time.condi_datetime.dim}#${time.condi_datetime.condi_between.comparator}#${time.condi_datetime.condi_between.t1}' + #end if + #end if + + #if $condi_source_coord.coord_source=="coord_from_file" + --coords '$coord_tabular' + --latname '$condi_source_coord.lat_dim' --lonname '$condi_source_coord.lon_dim' + --outputdir output_dir + #else + --outfile 'final.tabular' + #if $condi_source_coord.condi_coord.coord=='single' + --latname $condi_source_coord.condi_coord.lat_dim + --latvalN $condi_source_coord.condi_coord.lat_val + --lonname $condi_source_coord.condi_coord.lon_dim + --lonvalE $condi_source_coord.condi_coord.lon_val + #elif $condi_source_coord.condi_coord.coord=='subregion' + --latname $condi_source_coord.condi_coord.lat_dim + --latvalN $condi_source_coord.condi_coord.lat_valN + --latvalS $condi_source_coord.condi_coord.lat_valS + --lonname $condi_source_coord.condi_coord.lon_dim + --lonvalE $condi_source_coord.condi_coord.lon_valE + --lonvalW $condi_source_coord.condi_coord.lon_valW + #end if + #end if + ]]></command> + <inputs> + <param type="data" name="input" label="Input netcdf file" format="netcdf"/> + <param type="data" label="Tabular of variables" name="var_tab" format="tabular" help="Select the tabular file which summarize the available variables and dimensions."/> + + <param name="var" type="select" label="Choose the variable to extract"> + <options from_dataset="var_tab"> + <column name="name" index="0"/> + <column name="value" index="0"/> + </options> + </param> + + <conditional name="condi_source_coord"> + <param name="coord_source" type="select" label="Source of coordinates"> + <option value="coord_from_stdin">Manually enter coordinates</option> + <option value="coord_from_file">Use coordinates from input file</option> + </param> + + <when value="coord_from_file"> + <param type="data" label="Tabular of coord" name="coord_tabular" format="tabular" help="Format : Latitude Longitude"/> + <param name="lat_dim" type="select" label="Name of latitude coordinate" > + <options from_dataset="var_tab"> + <column name="value" index="0"/> + </options> + </param> + <param name="lon_dim" type="select" label="Name of longitude coordinate" > + <options from_dataset="var_tab"> + <column name="value" index="0"/> + </options> + </param> + </when> + + <when value="coord_from_stdin"> + <conditional name="condi_coord"> + <param name="coord" type="select" label="Geographical area" help="Use this option to get valid values at your custom coordinates."> + <option value="global">Whole available region</option> + <option value="single">Single location</option> + <option value="subregion">Sub-region extraction</option> + </param> + <when value="single"> + <param name="lat_dim" type="select" label="Name of latitude coordinate" > + <options from_dataset="var_tab"> + <column name="value" index="0"/> + </options> + </param> + <param name="lat_val" type="float" value="0" label="Latitude"/> + <param name="lon_dim" type="select" label="Name of longitude coordinate" > + <options from_dataset="var_tab"> + <column name="value" index="0"/> + </options> + </param> + <param name="lon_val" type="float" value="0" label="Longitude"/> + </when> + <when value="subregion"> + <param name="lat_dim" type="select" label="Name of latitude coordinate" > + <options from_dataset="var_tab"> + <column name="value" index="0"/> + </options> + </param> + <param name="lat_valN" type="float" value="0" label="Latitude North"/> + <param name="lat_valS" type="float" value="0" label="Latitude South"/> + <param name="lon_dim" type="select" label="Name of longitude coordinate" > + <options from_dataset="var_tab"> + <column name="value" index="0"/> + </options> + </param> + <param name="lon_valE" type="float" value="0" label="Longitude East"/> + <param name="lon_valW" type="float" value="0" label="Longitude West"/> + </when> + <when value="global"></when> + </conditional> + </when> + + </conditional> + + <section name="time" title="Select Time series" expanded="false"> + <conditional name="condi_datetime"> + <param name="datetime" type="select" label="Datetime selection" help="Use this option to extract timeseries from your dataset"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"></when> + <when value="yes"> + <param name="dim" type="select" label="Select datetime dimension" help="Use this option only if your dataset contains a date/time dimension"> + <options from_dataset="var_tab"> + <column name="value" index="0"/> + </options> + </param> + <conditional name="condi_between"> + <param name="comparator" type="select" label="date/Time selection"> + <option value="is">is</option> + <option value="sl">slice</option> + <option value="ne">nearest</option> + <option value="to">to</option> + <option value="from">from</option> + </param> + <when value="sl"> + <param name="t1" type="text" value="" label="Start date/time"/> + <param name="t2" type="text" value="" label="End date/time"/> + </when> + <when value="is"> + <param name="t1" type="text" value="" label="date/time"/> + </when> + <when value="ne"> + <param name="t1" type="text" value="" label="date/time"/> + </when> + <when value="to"> + <param name="t1" type="text" value="" label="date/time"/> + </when> + <when value="from"> + <param name="t1" type="text" value="" label="date/time"/> + </when> + </conditional> + </when> + </conditional> + </section> + <repeat name="user_choice" title="additional filter" help="Use this option to filter on the selected dataset"> + <param name="dim" type="select" label="Dimensions"> + <options from_dataset="var_tab"> + <column name="value" index="0"/> + </options> + </param> + <conditional name="condi_between"> + <param name="comparator" type="select" label="Comparator"> + <option value="e">Equal</option> + <option value="ge">Greater or equal</option> + <option value="le">Less or equal</option> + <option value="bi">Between-include [threshold1,threshold2]</option> + </param> + <when value="bi"> + <param name="t1" type="text" value="0" label="Inferior threshold"/> + <param name="t2" type="text" value="0" label="Superior threshold"/> + </when> + <when value="e"> + <param name="value" type="text" value="0" label="Value"/> + </when> + <when value="ge"> + <param name="value" type="text" value="0" label="Value"/> + </when> + <when value="le"> + <param name="value" type="text" value="0" label="Value"/> + </when> + </conditional> + </repeat> + </inputs> + <outputs> + <collection type="list" name="output"> + <discover_datasets pattern="__designation_and_ext__" visible="false" format="tabular" directory="output_dir"/> + <filter>condi_source_coord['coord_source'] == 'coord_from_file'</filter> + </collection> + <data name="simpleoutput" from_work_dir="final.tabular" format="tabular"> + <filter>condi_source_coord['coord_source'] == 'coord_from_stdin'</filter> + </data> + </outputs> + <tests> + <test> + <param name="input" value="dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc"/> + <param name="var" value="phy"/> + <param name="var_tab" value="var_tab_dataset-ibi"/> + <conditional name="condi_source_coord"> + <param name="coord_source" value="coord_from_stdin"/> + <conditional name="condi_coord"> + <param name="coord" value="single"/> + <param name="lat_dim" value="latitude"/> + <param name="lat_val" value="44.0"/> + <param name="lon_dim" value="longitude"/> + <param name="lon_val" value="-2.0"/> + </conditional> + </conditional> + <output name="simpleoutput" value="Test1.tabular"> + <assert_contents> + <has_text_matching expression="0\t2002-12-15\t0.5"/> + <has_text_matching expression="144\t2014-12-15\t0.5"/> + </assert_contents> + </output> + </test> + <test> + <param name="input" value="dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc"/> + <param name="var_tab" value="var_tab_dataset-ibi"/> + <param name="var" value="nh4"/> + + <conditional name="condi_source_coord"> + <param name="coord_source" value="coord_from_stdin"/> + <conditional name="condi_coord"> + <param name="coord" value="global"/> + </conditional> + </conditional> + <section name="time"> + <conditional name="condi_datetime"> + <param name="datetime" value="yes"/> + <conditional name="condi_between"> + <param name="comparator" value="sl"/> + <param name="t1" value="2003-12-15" /> + <param name="t2" value="2004-12-15" /> + </conditional> + </conditional> + </section> + <repeat name="user_choice"> + <param name="dim" value="nh4"/> + <conditional name="condi_between"> + <param name="comparator" value="ge"/> + <param name="value" value="50."/> + </conditional> + </repeat> + <output name="simpleoutput" value="Test2.tabular"> + <assert_contents> + <has_text_matching expression="0\t2003-12-15\t0.5"/> + <has_text_matching expression="23\t2004-12-15\t0.5"/> + </assert_contents> + </output> + </test> + </tests> + + <help><![CDATA[ +**What it does** + +This tool extracts variable values with custom conditions on dimensions. + +It can use manualy given coordinates or automaticaly take them from a tabular file to filter informations. + +If no values are availables at a coordinate X, the tool will search the closest coordinate with a non NA value. + +Filter can be set on every dimension. Available filtering operations are : =, >, <, >=, <=, [interval], ]interval[. + + + +**Input** + +A netcdf file (.nc). + +Variable tabular file from 'Netcdf Metadate Info'. + +Tabular file with coordinates and the following structure : 'lat' 'lon'. + + +**Outputs** + +A single output with values for the wanted variable if there is only one coordinate. + +A data collection where one file is created for every coordinate, if multiple coordinates from tabular file. + + +------------------------------------------------- + +The xarray select tool can be used after the xarray Info. + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xarray_tool.py Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,302 @@ +# xarray tool for: +# - getting metadata information +# - select data and save results in csv file for further post-processing + +import argparse +import csv +import warnings + +import geopandas as gdp + +import pandas as pd + +from shapely.geometry import Point +from shapely.ops import nearest_points + +import xarray as xr + + +class XarrayTool (): + def __init__(self, infile, outfile_info="", outfile_summary="", + select="", outfile="", outputdir="", latname="", + latvalN="", latvalS="", lonname="", lonvalE="", + lonvalW="", filter_list="", coords="", time="", + verbose=False + ): + self.infile = infile + self.outfile_info = outfile_info + self.outfile_summary = outfile_summary + self.select = select + self.outfile = outfile + self.outputdir = outputdir + self.latname = latname + if latvalN != "" and latvalN is not None: + self.latvalN = float(latvalN) + else: + self.latvalN = "" + if latvalS != "" and latvalS is not None: + self.latvalS = float(latvalS) + else: + self.latvalS = "" + self.lonname = lonname + if lonvalE != "" and lonvalE is not None: + self.lonvalE = float(lonvalE) + else: + self.lonvalE = "" + if lonvalW != "" and lonvalW is not None: + self.lonvalW = float(lonvalW) + else: + self.lonvalW = "" + self.filter = filter_list + self.time = time + self.coords = coords + self.verbose = verbose + # initialization + self.dset = None + self.gset = None + if self.verbose: + print("infile: ", self.infile) + print("outfile_info: ", self.outfile_info) + print("outfile_summary: ", self.outfile_summary) + print("outfile: ", self.outfile) + print("select: ", self.select) + print("outfile: ", self.outfile) + print("outputdir: ", self.outputdir) + print("latname: ", self.latname) + print("latvalN: ", self.latvalN) + print("latvalS: ", self.latvalS) + print("lonname: ", self.lonname) + print("lonvalE: ", self.lonvalE) + print("lonvalW: ", self.lonvalW) + print("filter: ", self.filter) + print("time: ", self.time) + print("coords: ", self.coords) + + def info(self): + f = open(self.outfile_info, 'w') + ds = xr.open_dataset(self.infile) + ds.info(f) + f.close() + + def summary(self): + f = open(self.outfile_summary, 'w') + ds = xr.open_dataset(self.infile) + writer = csv.writer(f, delimiter='\t') + header = ['VariableName', 'NumberOfDimensions'] + for idx, val in enumerate(ds.dims.items()): + header.append('Dim'+str(idx)+'Name') + header.append('Dim'+str(idx)+'Size') + writer.writerow(header) + for name, da in ds.data_vars.items(): + line = [name] + line.append(len(ds[name].shape)) + for d, s in zip(da.shape, da.sizes): + line.append(s) + line.append(d) + writer.writerow(line) + for name, da in ds.coords.items(): + line = [name] + line.append(len(ds[name].shape)) + for d, s in zip(da.shape, da.sizes): + line.append(s) + line.append(d) + writer.writerow(line) + f.close() + + def rowfilter(self, single_filter): + split_filter = single_filter.split('#') + filter_varname = split_filter[0] + op = split_filter[1] + ll = float(split_filter[2]) + if (op == 'bi'): + rl = float(split_filter[3]) + if filter_varname == self.select: + # filter on values of the selected variable + if op == 'bi': + self.dset = self.dset.where((self.dset <= rl) & + (self.dset >= ll)) + elif op == 'le': + self.dset = self.dset.where(self.dset <= ll) + elif op == 'ge': + self.dset = self.dset.where(self.dset >= ll) + elif op == 'e': + self.dset = self.dset.where(self.dset == ll) + else: # filter on other dimensions of the selected variable + if op == 'bi': + self.dset = self.dset.sel({filter_varname: slice(ll, rl)}) + elif op == 'le': + self.dset = self.dset.sel({filter_varname: slice(None, ll)}) + elif op == 'ge': + self.dset = self.dset.sel({filter_varname: slice(ll, None)}) + elif op == 'e': + self.dset = self.dset.sel({filter_varname: ll}, + method='nearest') + + def selection(self): + if self.dset is None: + self.ds = xr.open_dataset(self.infile) + self.dset = self.ds[self.select] # select variable + if self.time: + self.datetime_selection() + if self.filter: + self.filter_selection() + + self.area_selection() + # convert to dataframe + self.gset = self.gset.to_dataframe().dropna(how='all').reset_index() + self.gset.to_csv(self.outfile, header=True, sep='\t') + + def datetime_selection(self): + split_filter = self.time.split('#') + time_varname = split_filter[0] + op = split_filter[1] + ll = split_filter[2] + if (op == 'sl'): + rl = split_filter[3] + self.dset = self.dset.sel({time_varname: slice(ll, rl)}) + elif (op == 'to'): + self.dset = self.dset.sel({time_varname: slice(None, ll)}) + elif (op == 'from'): + self.dset = self.dset.sel({time_varname: slice(ll, None)}) + elif (op == 'is'): + self.dset = self.dset.sel({time_varname: ll}, method='nearest') + + def filter_selection(self): + for single_filter in self.filter: + self.rowfilter(single_filter) + + def area_selection(self): + if self.latvalS != "" and self.lonvalW != "": + # Select geographical area + self.gset = self.dset.sel({self.latname: + slice(self.latvalS, self.latvalN), + self.lonname: + slice(self.lonvalW, self.lonvalE)}) + elif self.latvalN != "" and self.lonvalE != "": + # select nearest location + self.nearest_location() # find nearest location without NaN values + self.gset = self.dset.sel({self.latname: self.nearest_latvalN, + self.lonname: self.nearest_lonvalE}, + method='nearest') + else: + self.gset = self.dset + + def nearest_location(self): + # Build a geopandas dataframe with all first elements in each dimension + # so we assume null values correspond to a mask that is the same for + # all dimensions in the dataset. + dsel_frame = self.dset + for dim in self.dset.dims: + if dim != self.latname and dim != self.lonname: + dsel_frame = dsel_frame.isel({dim: 0}) + # transform to pandas dataframe + dff = dsel_frame.to_dataframe().dropna().reset_index() + # transform to geopandas to collocate + gdf = gdp.GeoDataFrame(dff, + geometry=gdp.points_from_xy(dff[self.lonname], + dff[self.latname])) + # Find nearest location where values are not null + point = Point(self.lonvalE, self.latvalN) + multipoint = gdf.geometry.unary_union + queried_geom, nearest_geom = nearest_points(point, multipoint) + self.nearest_latvalN = nearest_geom.y + self.nearest_lonvalE = nearest_geom.x + + def selection_from_coords(self): + fcoords = pd.read_csv(self.coords, sep='\t') + for row in fcoords.itertuples(): + self.latvalN = row[0] + self.lonvalE = row[1] + self.outfile = (self.outputdir + '/' + self.select + '_' + + str(row.Index) + '.tabular') + self.selection() + + +if __name__ == '__main__': + warnings.filterwarnings("ignore") + parser = argparse.ArgumentParser() + + parser.add_argument( + 'infile', + help='netCDF input filename' + ) + parser.add_argument( + '--info', + help='Output filename where metadata information is stored' + ) + parser.add_argument( + '--summary', + help='Output filename where data summary information is stored' + ) + parser.add_argument( + '--select', + help='Variable name to select' + ) + parser.add_argument( + '--latname', + help='Latitude name' + ) + parser.add_argument( + '--latvalN', + help='North latitude value' + ) + parser.add_argument( + '--latvalS', + help='South latitude value' + ) + parser.add_argument( + '--lonname', + help='Longitude name' + ) + parser.add_argument( + '--lonvalE', + help='East longitude value' + ) + parser.add_argument( + '--lonvalW', + help='West longitude value' + ) + parser.add_argument( + '--coords', + help='Input file containing Latitude and Longitude' + 'for geographical selection' + ) + parser.add_argument( + '--filter', + nargs="*", + help='Filter list variable#operator#value_s#value_e' + ) + parser.add_argument( + '--time', + help='select timeseries variable#operator#value_s[#value_e]' + ) + parser.add_argument( + '--outfile', + help='csv outfile for storing results of the selection' + '(valid only when --select)' + ) + parser.add_argument( + '--outputdir', + help='folder name for storing results with multiple selections' + '(valid only when --select)' + ) + parser.add_argument( + "-v", "--verbose", + help="switch on verbose mode", + action="store_true" + ) + args = parser.parse_args() + + p = XarrayTool(args.infile, args.info, args.summary, args.select, + args.outfile, args.outputdir, args.latname, + args.latvalN, args.latvalS, args.lonname, + args.lonvalE, args.lonvalW, args.filter, + args.coords, args.time, args.verbose) + if args.info: + p.info() + if args.summary: + p.summary() + if args.coords: + p.selection_from_coords() + elif args.select: + p.selection()