comparison xarray_info.py @ 4:b393815e4cb7 draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit fd8ad4d97db7b1fd3876ff63e14280474e06fdf7
author ecology
date Sun, 31 Jul 2022 21:20:41 +0000
parents
children
comparison
equal deleted inserted replaced
3:bf595d613af4 4:b393815e4cb7
1 # xarray tool for:
2 # - getting metadata information
3 # - select data and save results in csv file for further post-processing
4
5 import argparse
6 import csv
7 import os
8 import warnings
9
10 import xarray as xr
11
12
13 class XarrayInfo ():
14 def __init__(self, infile, outfile_info="", outfile_summary="",
15 verbose=False, coords_info=None):
16 self.infile = infile
17 self.outfile_info = outfile_info
18 self.outfile_summary = outfile_summary
19 self.coords_info = coords_info
20 self.verbose = verbose
21 # initialization
22 self.dset = None
23 self.gset = None
24 if self.verbose:
25 print("infile: ", self.infile)
26 print("outfile_info: ", self.outfile_info)
27 print("outfile_summary: ", self.outfile_summary)
28 print("coords_info: ", self.coords_info)
29
30 def info(self):
31 f = open(self.outfile_info, 'w')
32 ds = xr.open_dataset(self.infile)
33 ds.info(f)
34 f.close()
35
36 def summary(self):
37 f = open(self.outfile_summary, 'w')
38 ds = xr.open_dataset(self.infile)
39 writer = csv.writer(f, delimiter='\t')
40 header = ['VariableName', 'NumberOfDimensions']
41 for idx, val in enumerate(ds.dims.items()):
42 header.append('Dim' + str(idx) + 'Name')
43 header.append('Dim' + str(idx) + 'Size')
44 writer.writerow(header)
45 for name, da in ds.data_vars.items():
46 line = [name]
47 line.append(len(ds[name].shape))
48 for d, s in zip(da.shape, da.sizes):
49 line.append(s)
50 line.append(d)
51 writer.writerow(line)
52 for name, da in ds.coords.items():
53 line = [name]
54 line.append(len(ds[name].shape))
55 for d, s in zip(da.shape, da.sizes):
56 line.append(s)
57 line.append(d)
58 writer.writerow(line)
59 f.close()
60
61 def get_coords_info(self):
62 ds = xr.open_dataset(self.infile)
63 for c in ds.coords:
64 filename = os.path.join(self.coords_info,
65 c.strip() +
66 '.tabular')
67 pd = ds.coords[c].to_pandas()
68 pd.index = range(len(pd))
69 pd.to_csv(filename, header=False, sep='\t')
70
71
72 if __name__ == '__main__':
73 warnings.filterwarnings("ignore")
74 parser = argparse.ArgumentParser()
75
76 parser.add_argument(
77 'infile',
78 help='netCDF input filename'
79 )
80 parser.add_argument(
81 '--info',
82 help='Output filename where metadata information is stored'
83 )
84 parser.add_argument(
85 '--summary',
86 help='Output filename where data summary information is stored'
87 )
88 parser.add_argument(
89 '--coords_info',
90 help='output-folder where for each coordinate, coordinate values '
91 ' are being printed in the corresponding outputfile'
92 )
93 parser.add_argument(
94 "-v", "--verbose",
95 help="switch on verbose mode",
96 action="store_true"
97 )
98 args = parser.parse_args()
99
100 p = XarrayInfo(args.infile, args.info, args.summary,
101 args.verbose, args.coords_info)
102 if args.info:
103 p.info()
104 elif args.coords_info:
105 p.get_coords_info()
106 if args.summary:
107 p.summary()