Skip to content

Commit 6d90d9a

Browse files
Merge pull request #27 from valeriupredoi/add_realworld_test
Add real world test suite
2 parents 8243e70 + eeb26bd commit 6d90d9a

File tree

8 files changed

+325
-23
lines changed

8 files changed

+325
-23
lines changed

activestorage/active.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from zarr.indexing import (
88
OrthogonalIndexer,
99
)
10-
from activestorage.storage import decode_chunk
10+
from activestorage.storage import reduce_chunk
1111
from activestorage import netcdf_to_zarr as nz
1212

1313

@@ -249,7 +249,7 @@ def _process_chunk(self, fsref, chunk_coords, chunk_selection, out,
249249
coord = '.'.join([str(c) for c in chunk_coords])
250250
key = f"{self.ncvar}/{coord}"
251251
rfile, offset, size = tuple(fsref[key])
252-
tmp = decode_chunk(rfile, offset, size, compressor, filters, missing,
252+
tmp = reduce_chunk(rfile, offset, size, compressor, filters, missing,
253253
self.zds._dtype, self.zds._chunks, self.zds._order,
254254
chunk_selection, method=self.method)
255255

activestorage/dummy_data.py

+120-16
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from os import lseek
12
from netCDF4 import Dataset
23
import numpy as np
34
import zarr
@@ -18,44 +19,147 @@ def _make_data(n=10):
1819
return dd, data
1920

2021

21-
def make_test_ncdata(filename='test_bizarre.nc', chunksize=(3,3,1), compression=None, n=10):
22+
def make_missing_ncdata(filename='test_missing.nc', chunksize=(3,3,1), n=10):
2223
"""
23-
Make a test dataset which is three dimensional with indices and values that
24-
aid in testing data extraction. If compression is required, it can be passed in via keyword
25-
and is applied to all variables
24+
Makes a test dataset based on the default vanilla dataset, but which includes
25+
some missing values
2626
"""
27+
return make_ncdata(filename, chunksize, n, compression=None, missing=-999.)
28+
29+
def make_fillvalue_ncdata(filename='test_fillvalue.nc', chunksize=(3,3,1), n=10):
30+
"""
31+
Makes a test dataset based on the default vanilla dataset, but which includes
32+
some non-default fillvalues.
33+
"""
34+
return make_ncdata(filename, chunksize, n, compression=None, fillvalue=-999.)
35+
36+
def make_validmin_ncdata(filename='test_validmin.nc', chunksize=(3,3,1), n=10):
37+
"""
38+
Makes a test dataset based on the default vanilla dataset, but which includes
39+
missing values below min.
40+
"""
41+
return make_ncdata(filename, chunksize, n, compression=None, valid_min=-1.)
42+
43+
def make_validmax_ncdata(filename='test_validmax.nc', chunksize=(3,3,1), n=10):
44+
"""
45+
Makes a test dataset based on the default vanilla dataset, but which includes
46+
missing values above max
47+
"""
48+
return make_ncdata(filename, chunksize, n, compression=None, valid_max=1.2*n**3)
49+
50+
51+
def make_validrange_ncdata(filename='test_validrange.nc', chunksize=(3,3,1), n=10):
52+
"""
53+
Makes a test dataset based on the default vanilla dataset, but which includes
54+
missing values outside range
55+
"""
56+
return make_ncdata(filename, chunksize, n, compression=None, valid_range=[-1.0,1.2*n**3])
57+
58+
def make_vanilla_ncdata(filename='test_vanilla.nc', chunksize=(3,3,1), n=10):
59+
"""
60+
Make a vanilla test dataset which is three dimensional with indices and values that
61+
aid in testing data extraction.
62+
"""
63+
r = make_ncdata(filename, chunksize, n, None, False)
64+
return
65+
66+
def make_ncdata(filename, chunksize, n, compression=None,
67+
missing=None,
68+
fillvalue=None,
69+
valid_range=None,
70+
valid_min=None,
71+
valid_max=None):
72+
"""
73+
If compression is required, it can be passed in via keyword
74+
and is applied to all variables.
75+
76+
Note that if compression is not None, or any of the valid
77+
data options (missing etc) are selected, then four values
78+
(for each option) are modified and made invalid.
79+
80+
For the purposes of test data, bounds (valid_min, range etc)
81+
need to be non-zero, although that wont hold in real life.
82+
"""
83+
84+
def make_holes(var, indices, attribute, value, dummy):
85+
if value is not None:
86+
assert type(value) == float
87+
setattr(var,attribute,value)
88+
for i,j,k in indices:
89+
var[i,j,k] = dummy
90+
91+
assert n > 4
92+
2793
ds = Dataset(filename, 'w', format="NETCDF4")
2894
dd, data = _make_data(n)
2995

3096
xdim = ds.createDimension("xdim",n)
3197
ydim = ds.createDimension("ydim",n)
3298
zdim = ds.createDimension("zdim",n)
33-
x = ds.createVariable("x","i4",("xdim",), compression=compression)
34-
y = ds.createVariable("y","i4",("ydim",), compression=compression)
35-
z = ds.createVariable("z","i4",("zdim",), compression=compression)
99+
100+
x = ds.createVariable("x","i4",("xdim",), fill_value=fillvalue, compression=compression)
101+
y = ds.createVariable("y","i4",("ydim",), fill_value=fillvalue, compression=compression)
102+
z = ds.createVariable("z","i4",("zdim",), fill_value=fillvalue, compression=compression)
36103

37104
for a,s in zip([x,y,z],[1,n,n*n]):
38105
a[:] = dd*s
39106

40107
dvar = ds.createVariable("data","f8",("xdim","ydim","zdim"), chunksizes=chunksize, compression=compression)
41108
dvar[:] = data
109+
110+
nm1,nm2 = n-1, n-2
111+
# we use a diffferent set of indices for all the values to be masked
112+
mindices, findices, vrindices, vm1indices, vm2indices = None, None, None, None, None
113+
if missing:
114+
# we use the deprecated missing_value option
115+
mindices = [(1,1,1),(n/2,1,1),(1,nm1,1),(nm1,1,n/2)]
116+
dvar = make_holes(dvar, mindices, 'missing_value', missing, missing)
117+
118+
if fillvalue:
119+
# note we use a different set of indices for
120+
findices = [(nm1,nm1,nm1),(n/2,n/2,1),(1,1,n/2),(nm1,nm1,n/2)]
121+
dvar = make_holes(dvar, findices, '_FillValue', None, fillvalue)
122+
123+
if valid_range and valid_min or valid_range and valid_max:
124+
raise ValueError("Can't mix and match validity options")
125+
126+
if valid_min:
127+
if valid_min == 0.0:
128+
raise ValueError('Dummy data needs a non-zero valid min')
129+
vm1indices = [(2,2,2),(n/2,2,2),(2,nm1,2),(nm1,2,nm1/2)]
130+
dvar = make_holes(dvar, vm1indices, 'valid_min', valid_min, valid_min-abs(0.1*valid_min))
42131

132+
if valid_max:
133+
if valid_min == 0.0:
134+
raise ValueError('Dummy data needs a non-zero valid max')
135+
vm2indices = [(2,nm1,2),(2,2,nm1),(nm2,nm2,nm1),(nm1,nm2,n/2)]
136+
dvar = make_holes(dvar, vm2indices, 'valid_max', valid_max, valid_max*10)
137+
138+
if valid_range:
139+
assert len(valid_range) == 2 and type(valid_range[0]) == float
140+
if valid_range[0] == 0.0 or valid_range[1] == 0.0:
141+
raise ValueError('Dummy data needs non-zero range bounds')
142+
vrindices = [(2,nm1,nm2),(2,nm2,nm1),(nm1,nm2,nm1),(n/2,n/2+1,n/2)]
143+
dvar.valid_range=valid_range
144+
for i,j,k in vrindices[0:2]:
145+
dvar[i,j,k]= valid_range[0]-abs(0.1*valid_range[0])
146+
for i,j,k in vrindices[2:]:
147+
dvar[i,j,k] = valid_range[1]*10
148+
43149
ds.close()
44150

45151
ds = Dataset(filename,'r')
46152
var = ds.variables['data']
47153
print(f'\nCreated file "{filename}" with a variable called "data" with shape {var.shape} and chunking, compression {var.chunking()},{compression}\n')
48154

49-
50-
def make_testzarr_variable_file(filename='test.zarr'):
51-
"""
52-
Make a test variable and write to a zarr file.
53-
#FIXME: Not quite sure how to get the chunking right yet
54-
"""
55-
dd, data = _make_data()
56-
zarr.save(filename, x=dd, y=dd*10, z=dd*100, data=data)
155+
return mindices, findices, vrindices, vm1indices, vm2indices
57156

58157

59158
if __name__=="__main__":
60-
make_test_ncdata()
159+
make_vanilla_ncdata()
160+
make_validmin_ncdata()
161+
make_validmax_ncdata()
162+
make_missing_ncdata()
163+
make_fillvalue_ncdata()
164+
make_validrange_ncdata()
61165

activestorage/storage.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from ast import Not
22
from numcodecs.compat import ensure_ndarray
33

4-
def decode_chunk(rfile, offset, size, compression, filters, missing, dtype, shape, order, chunk_selection, method=None):
4+
def reduce_chunk(rfile, offset, size, compression, filters, missing, dtype, shape, order, chunk_selection, method=None):
55
""" We do our own read of chunks and decoding etc
66
77
rfile - the actual file with the data

old_code/test_harness-learning.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import unittest
33
import os
44
from activestorage.active_tools import make_an_array_instance_active
5-
from activestorage.dummy_data import make_test_ncdata
5+
from activestorage.dummy_data import make_vanilla_ncdata
66
from netCDF4 import Dataset
77
import numpy as np
88
from numcodecs.compat import ensure_ndarray
@@ -217,7 +217,7 @@ def setUp(self):
217217
"""
218218
self.testfile = 'test_bizarre.nc'
219219
if not os.path.exists(self.testfile):
220-
make_test_ncdata(filename=self.testfile)
220+
make_vanilla_ncdata(filename=self.testfile)
221221

222222
def testRead0(self):
223223
"""

0 commit comments

Comments
 (0)