From 99b623d138adeb0929fb8f78ca8665e5d4a15b13 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 8 Feb 2024 22:23:16 +0000 Subject: [PATCH 01/14] Get missing values and dtype without re-opening the dataset --- activestorage/active.py | 152 ++++++++++++++------------- activestorage/netcdf_to_zarr.py | 34 ++++-- tests/test_bigger_data.py | 4 +- tests/test_data/daily_data_masked.nc | Bin 26632 -> 26737 bytes tests/unit/test_active.py | 4 +- 5 files changed, 108 insertions(+), 86 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index fed673ca..e3947399 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -68,7 +68,7 @@ def __new__(cls, *args, **kwargs): } return instance - def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue=None, valid_min=None, valid_max=None, max_threads=100): + def __init__(self, uri, ncvar, storage_type=None, max_threads=100): """ Instantiate with a NetCDF4 dataset and the variable of interest within that file. (We need the variable, because we need variable specific metadata from within that @@ -92,65 +92,6 @@ def __init__(self, uri, ncvar, storage_type=None, missing_value=None, _FillValue self._method = None self._lock = False self._max_threads = max_threads - - # obtain metadata, using netcdf4_python for now - # FIXME: There is an outstanding issue with ._FilLValue to be handled. - # If the user actually wrote the data with no fill value, or the - # default fill value is in play, then this might go wrong. - if storage_type is None: - ds = Dataset(uri) - elif storage_type == "s3": - with load_from_s3(uri) as _ds: - ds = _ds - try: - ds_var = ds[ncvar] - except IndexError as exc: - print(f"Dataset {ds} does not contain ncvar {ncvar!r}.") - raise exc - - # FIXME: We do not get the correct byte order on the Zarr Array's dtype - # when using S3, so capture it here. - self._dtype = ds_var.dtype - - if (missing_value, _FillValue, valid_min, valid_max) == (None, None, None, None): - if isinstance(ds, Dataset): - self._missing = getattr(ds_var, 'missing_value', None) - self._fillvalue = getattr(ds_var, '_FillValue', None) - # could be fill_value set as netCDF4 attr - if self._fillvalue is None: - self._fillvalue = getattr(ds_var, 'fill_value', None) - valid_min = getattr(ds_var, 'valid_min', None) - valid_max = getattr(ds_var, 'valid_max', None) - valid_range = getattr(ds_var, 'valid_range', None) - elif storage_type == "s3": - self._missing = ds_var.attrs.get('missing_value') - self._fillvalue = ds_var.attrs.get('_FillValue') - # could be fill_value set as netCDF4 attr - if self._fillvalue is None: - self._fillvalue = ds_var.attrs.get('fill_value') - valid_min = ds_var.attrs.get('valid_min') - valid_max = ds_var.attrs.get('valid_max') - valid_range = ds_var.attrs.get('valid_range') - - if valid_max is not None or valid_min is not None: - if valid_range is not None: - raise ValueError( - "Invalid combination in the file of valid_min, " - "valid_max, valid_range: " - f"{valid_min}, {valid_max}, {valid_range}" - ) - valid_range = (valid_min, valid_max) - elif valid_range is None: - valid_range = (None, None) - self._valid_min, self._valid_max = valid_range - - else: - self._missing = missing_value - self._fillvalue = _FillValue - self._valid_min = valid_min - self._valid_max = valid_max - - ds.close() def __getitem__(self, index): """ @@ -174,22 +115,16 @@ def __getitem__(self, index): elif self.storage_type == "s3": with load_from_s3(self.uri) as nc: data = nc[ncvar][index] - # h5netcdf doesn't return masked arrays. - if self._fillvalue: - data = np.ma.masked_equal(data, self._fillvalue) - if self._missing: - data = np.ma.masked_equal(data, self._missing) - if self._valid_max: - data = np.ma.masked_greater(data, self._valid_max) - if self._valid_min: - data = np.ma.masked_less(data, self._valid_min) + data = self._mask_data(data, nc[ncvar]) if lock: lock.release() - + return data + elif self._version == 1: return self._via_kerchunk(index) + elif self._version == 2: # No active operation either lock = self.lock @@ -202,6 +137,7 @@ def __getitem__(self, index): lock.release() return data + else: raise ValueError(f'Version {self._version} not supported') @@ -299,14 +235,27 @@ def _via_kerchunk(self, index): if self.zds is None: print(f"Kerchunking file {self.uri} with variable " f"{self.ncvar} for storage type {self.storage_type}") - ds = nz.load_netcdf_zarr_generic(self.uri, - self.ncvar, - self.storage_type) + ds, zarray, zattrs = nz.load_netcdf_zarr_generic( + self.uri, + self.ncvar, + self.storage_type + ) # The following is a hangove from exploration # and is needed if using the original doing it ourselves # self.zds = make_an_array_instance_active(ds) self.zds = ds + # Retain attributes and other information + if zarray.get('fill_value') is not None: + zattrs['_FillValue'] = zarray['fill_value'] + + self.zarray = zarray + self.zattrs = zattrs + + # FIXME: We do not get the correct byte order on the Zarr + # Array's dtype when using S3, so capture it here. + self._dtype = np.dtype(zarray['dtype']) + return self._get_selection(index) def _get_selection(self, *args): @@ -319,7 +268,28 @@ def _get_selection(self, *args): compressor = self.zds._compressor filters = self.zds._filters - missing = self._fillvalue, self._missing, self._valid_min, self._valid_max + # Get missing values + _FillValue = self.zattrs.get('_FillValue') + missing_value = self.zattrs.get('missing_value') + valid_min = self.zattrs.get('valid_min') + valid_max = self.zattrs.get('valid_max') + valid_range = self.zattrs.get('valid_range') + if valid_max is not None or valid_min is not None: + if valid_range is not None: + raise ValueError( + "Invalid combination in the file of valid_min, " + "valid_max, valid_range: " + f"{valid_min}, {valid_max}, {valid_range}" + ) + elif valid_range: + valid_min, valid_max = valid_range + + missing = ( + _FillValue, + missing_value, + valid_min, + valid_max, + ) indexer = OrthogonalIndexer(*args, self.zds) out_shape = indexer.shape @@ -468,3 +438,37 @@ def _process_chunk(self, session, fsref, chunk_coords, chunk_selection, counts, if drop_axes: tmp = np.squeeze(tmp, axis=drop_axes) return tmp, out_selection + + def _mask_data(self, data, ds_var): + """ppp""" + # TODO: replace with cfdm.NetCDFIndexer, hopefully. + attrs = ds_var.attrs + missing_value = attrs.get('missing_value') + _FillValue = attrs.get('_FillValue') + valid_min = attrs.get('valid_min') + valid_max = attrs.get('valid_max') + valid_range = attrs.get('valid_range') + + if valid_max is not None or valid_min is not None: + if valid_range is not None: + raise ValueError( + "Invalid combination in the file of valid_min, " + "valid_max, valid_range: " + f"{valid_min}, {valid_max}, {valid_range}" + ) + elif valid_range: + valid_min, valid_max = valid_range + + if _FillValue is not None: + data = np.ma.masked_equal(data, fillvalue) + + if missing_value is not None: + data = np.ma.masked_equal(data, missing) + + if valid_max is not None: + data = np.ma.masked_greater(data, valid_max) + + if valid_min is not None: + data = np.ma.masked_less(data, valid_min) + + return data diff --git a/activestorage/netcdf_to_zarr.py b/activestorage/netcdf_to_zarr.py index a8dcb034..782ff729 100644 --- a/activestorage/netcdf_to_zarr.py +++ b/activestorage/netcdf_to_zarr.py @@ -10,7 +10,7 @@ from kerchunk.hdf import SingleHdf5ToZarr -def gen_json(file_url, outf, storage_type): +def gen_json(file_url, varname, outf, storage_type): """Generate a json file that contains the kerchunk-ed data for Zarr.""" if storage_type == "s3": fs = s3fs.S3FileSystem(key=S3_ACCESS_KEY, @@ -24,7 +24,8 @@ def gen_json(file_url, outf, storage_type): h5chunks = SingleHdf5ToZarr(s3file, file_url, inline_threshold=0) with fs2.open(outf, 'wb') as f: - f.write(ujson.dumps(h5chunks.translate()).encode()) + content = h5chunks.translate() + f.write(ujson.dumps(content).encode()) else: fs = fsspec.filesystem('') with fs.open(file_url, 'rb') as local_file: @@ -43,9 +44,13 @@ def gen_json(file_url, outf, storage_type): # faster loading time # for active storage, we don't want anything inline with fs.open(outf, 'wb') as f: - f.write(ujson.dumps(h5chunks.translate()).encode()) + content = h5chunks.translate() + f.write(ujson.dumps(content).encode()) - return outf + zarray = ujson.loads(content['refs'][f"{varname}/.zarray"]) + zattrs = ujson.loads(content['refs'][f"{varname}/.zattrs"]) + + return outf, zarray, zattrs def open_zarr_group(out_json, varname): @@ -60,6 +65,7 @@ def open_zarr_group(out_json, varname): mapper = fs.get_mapper("") # local FS mapper #mapper.fs.reference has the kerchunk mapping, how does this propagate into the Zarr array? zarr_group = zarr.open_group(mapper) + try: zarr_array = getattr(zarr_group, varname) except AttributeError as attrerr: @@ -67,7 +73,7 @@ def open_zarr_group(out_json, varname): f"Zarr Group info: {zarr_group.info}") raise attrerr #print("Zarr array info:", zarr_array.info) - + return zarr_array @@ -77,10 +83,24 @@ def load_netcdf_zarr_generic(fileloc, varname, storage_type, build_dummy=True): # Write the Zarr group JSON to a temporary file. with tempfile.NamedTemporaryFile() as out_json: - gen_json(fileloc, out_json.name, storage_type) + _, zarray, zattrs = gen_json(fileloc, varname, out_json.name, storage_type) # open this monster print(f"Attempting to open and convert {fileloc}.") ref_ds = open_zarr_group(out_json.name, varname) - return ref_ds + return ref_ds, zarray, zattrs + + +#d = {'version': 1, +# 'refs': { +# '.zgroup': '{"zarr_format":2}', +# '.zattrs': '{"Conventions":"CF-1.6","access-list":"grenvillelister simonwilson jeffcole","awarning":"**** THIS SUITE WILL ARCHIVE NON-DUPLEXED DATA TO MOOSE. FOR CRITICAL MODEL RUNS SWITCH TO DUPLEXED IN: postproc --> Post Processing - common settings --> Moose Archiving --> non_duplexed_set. Follow guidance in http:\\/\\/www-twiki\\/Main\\/MassNonDuplexPolicy","branch-date":"1950-01-01","calendar":"360_day","code-version":"UM 11.6, NEMO vn3.6","creation_time":"2022-10-28 12:28","decription":"Initialised from EN4 climatology","description":"Copy of u-ar696\\/trunk@77470","email":"r.k.schieman@reading.ac.uk","end-date":"2015-01-01","experiment-id":"historical","forcing":"AA,BC,CO2","forcing-info":"blah, blah, blah","institution":"NCAS","macro-parent-experiment-id":"historical","macro-parent-experiment-mip":"CMIP","macro-parent-variant-id":"r1i1p1f3","model-id":"HadGEM3-CG31-MM","name":"\\/work\\/n02\\/n02\\/grenvill\\/cylc-run\\/u-cn134\\/share\\/cycle\\/19500101T0000Z\\/3h_","owner":"rosalynhatcher","project":"Coupled Climate","timeStamp":"2022-Oct-28 12:20:33 GMT","title":"[CANARI] GC3.1 N216 ORCA025 UM11.6","uuid":"51e5ef20-d376-4aa6-938e-4c242886b7b1"}', +# 'lat/.zarray': '{"chunks":[324],"compressor":{"id":"zlib","level":1},"dtype":"|T!&^WAB+CI+nh90P zBg4SJI@!@$WU>mQ4I2Z)kD5PEH>WU~vQDGU^kW-FMb(M<_NiL zl)(Ztr<8#~97t8wDj#R~3Un5OJp%(jkXrpd<~YM$kcvtM1_>bbGCk(>(!Gc%OwQWGc#}iA<*!Q z%;J*#qDlr~pal^?401q7MyY~ZYLbG2g@Tcxm5HU5v9W@Yfsu)oLSAxWNl9vog08=U zZlXeNW^r+5UV404VoqtQPD!FpvQC^%N@7W3d~RZKc4~@VUNQqS z&_b}KetFLRU>g&lHs+<4IJ>wh_!p!WB?6sOtWcI(RGgWgr(mjQpl7C_;gO%4T9BBY zs$i>-QBqQ1rLUitoUd1$rU%rhqu`vMf+UlkS&~tjq?eqZ3sMBanwvNBtY%uwk-{?Z zy{x`70|PtIDSQkfK*)gN2L?d}c~GDLfdYd#2ZMM$1HIe)huXmKoyU5!A^Ue_K7^RVs*Det&9uJ=^Uh#q$Y24f%;T3nJ-I(da&v(FCt;>W z?#V8}YK-EO8(GvRFJKZAGn0fE$H;KB^TlGfV4$rGYndnS3wC2!S#9nznIq(~z*nFv z80;As_<_{w_c6yO^M Date: Fri, 9 Feb 2024 15:32:23 +0000 Subject: [PATCH 02/14] remove unneeded test case --- tests/unit/test_active.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/unit/test_active.py b/tests/unit/test_active.py index 05102535..9b3541ea 100644 --- a/tests/unit/test_active.py +++ b/tests/unit/test_active.py @@ -35,13 +35,6 @@ def test_getitem(): active = Active(uri, ncvar=None) assert str(exc.value) == "Must set a netCDF variable name to slice" - # unopenable file - ncvar = "tas" - baseexc = "tas not found in /" - with pytest.raises(IndexError) as exc: - active = Active(uri, ncvar=ncvar) - assert baseexc in str(exc.value) - # openable file and correct variable uri = "tests/test_data/cesm2_native.nc" ncvar = "TREFHT" From dc0cde30dcfac335261ce615ef0fb603b9572024 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 9 Feb 2024 15:36:09 +0000 Subject: [PATCH 03/14] patch and retire bits in S3 test --- tests/unit/test_storage_types.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_storage_types.py b/tests/unit/test_storage_types.py index 2f789261..6bd82e3d 100644 --- a/tests/unit/test_storage_types.py +++ b/tests/unit/test_storage_types.py @@ -89,7 +89,9 @@ def reduce_chunk( assert result == 999.0 - mock_load.assert_called_once_with(uri) + # S3 loading is not done from Active anymore + # mock_load.assert_called_once_with(uri) + mock_nz.assert_called_once_with(uri, "data", "s3") # NOTE: This gets called multiple times with various arguments. Match on # the common ones. @@ -134,6 +136,7 @@ def load_from_s3(uri): assert np.max(result) == 999.0 +@pytest.mark.skip(reason="No more valid file load in Active") @mock.patch.object(activestorage.active, "load_from_s3") def test_s3_load_failure(mock_load): """Test when an S3 object doesn't exist.""" From 0884af85bb8d107ad68dbced8b6852e2e11071cf Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 9 Feb 2024 17:39:54 +0000 Subject: [PATCH 04/14] valid_range boolean --- activestorage/active.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index e3947399..e1148d05 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -281,7 +281,7 @@ def _get_selection(self, *args): "valid_max, valid_range: " f"{valid_min}, {valid_max}, {valid_range}" ) - elif valid_range: + elif valid_range is not None: valid_min, valid_max = valid_range missing = ( From a69fb2c0661b80bdd699d9a50a1262bf847a4ac4 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 9 Feb 2024 17:42:30 +0000 Subject: [PATCH 05/14] fill value typo --- activestorage/active.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index e1148d05..98221349 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -460,7 +460,7 @@ def _mask_data(self, data, ds_var): valid_min, valid_max = valid_range if _FillValue is not None: - data = np.ma.masked_equal(data, fillvalue) + data = np.ma.masked_equal(data, _FillValue) if missing_value is not None: data = np.ma.masked_equal(data, missing) From 2f1082276ef93b32c44067e03d4c64a29f77627d Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 9 Feb 2024 17:43:10 +0000 Subject: [PATCH 06/14] missing value typo --- activestorage/active.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index 98221349..2ca3a60a 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -463,7 +463,7 @@ def _mask_data(self, data, ds_var): data = np.ma.masked_equal(data, _FillValue) if missing_value is not None: - data = np.ma.masked_equal(data, missing) + data = np.ma.masked_equal(data, missing_value) if valid_max is not None: data = np.ma.masked_greater(data, valid_max) From a0fa0b744096ca4ffcf8f4377d4e6841739e391d Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 9 Feb 2024 18:00:57 +0000 Subject: [PATCH 07/14] dev --- tests/test_missing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_missing.py b/tests/test_missing.py index 7f5f4a19..87501fd9 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -53,7 +53,6 @@ def active_two(testfile): return active_mean - def test_partially_missing_data(tmp_path): testfile = str(tmp_path / 'test_partially_missing_data.nc') r = dd.make_partially_missing_ncdata(testfile) From 7cd6f0954f2e30e9a4f669d98e15d6d7b881a54b Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 9 Feb 2024 18:05:16 +0000 Subject: [PATCH 08/14] dev --- activestorage/active.py | 2 +- tests/test_missing.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index 2ca3a60a..82f27f92 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -456,7 +456,7 @@ def _mask_data(self, data, ds_var): "valid_max, valid_range: " f"{valid_min}, {valid_max}, {valid_range}" ) - elif valid_range: + elif valid_range is not None: valid_min, valid_max = valid_range if _FillValue is not None: diff --git a/tests/test_missing.py b/tests/test_missing.py index 87501fd9..7f5f4a19 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -53,6 +53,7 @@ def active_two(testfile): return active_mean + def test_partially_missing_data(tmp_path): testfile = str(tmp_path / 'test_partially_missing_data.nc') r = dd.make_partially_missing_ncdata(testfile) From d70746e07d2b606a3e0cafdaeb195e7db849ce7b Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 9 Feb 2024 18:26:22 +0000 Subject: [PATCH 09/14] dev --- tests/test_bigger_data.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_bigger_data.py b/tests/test_bigger_data.py index c64c6c75..2f38bddb 100644 --- a/tests/test_bigger_data.py +++ b/tests/test_bigger_data.py @@ -149,7 +149,7 @@ def test_native_emac_model_fails(test_data_path): """ An example of netCDF file that doesn't work - The actual issue is with h5py - it can't read it (netCDF classic) + The actual issue is with h5py - it can't read it (netCDF3 classic) h5py/_objects.pyx:54: in h5py._objects.with_phil.wrapper ??? @@ -175,8 +175,9 @@ def test_native_emac_model_fails(test_data_path): pass if USE_S3: + active = Active(uri, "aps_ave", utils.get_storage_type()) with pytest.raises(OSError): - active = Active(uri, "aps_ave", utils.get_storage_type()) + active[...] else: active = Active(uri, "aps_ave") active._version = 2 From 92495f74808a320890f785c8fa9ca69c9d2dd5c6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Feb 2024 13:18:28 +0000 Subject: [PATCH 10/14] add to tests --- tests/unit/test_active.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/unit/test_active.py b/tests/unit/test_active.py index 9b3541ea..6570764c 100644 --- a/tests/unit/test_active.py +++ b/tests/unit/test_active.py @@ -4,6 +4,8 @@ import threading from activestorage.active import Active +from activestorage.active import load_from_s3 +from botocore.exceptions import EndpointConnectionError as botoExc def test_uri_none(): @@ -96,3 +98,13 @@ def test_lock(): active.lock = None assert active.lock is False + + +def test_load_from_s3(): + """Test basic load from S3 without loading from S3.""" + uri = "s3://bucket/file.nc" + expected_exc = "Could not connect to the endpoint URL" + with pytest.raises(botoExc) as exc: + with load_from_s3(uri) as nc: + data = nc["cow"][0] + assert expected_exc in str(exc.value) From efdce9f42dc6383c3961ae290beec334a111c887 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Feb 2024 13:26:24 +0000 Subject: [PATCH 11/14] adjust mock load --- tests/unit/test_storage_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_storage_types.py b/tests/unit/test_storage_types.py index 6bd82e3d..82d6761a 100644 --- a/tests/unit/test_storage_types.py +++ b/tests/unit/test_storage_types.py @@ -90,7 +90,7 @@ def reduce_chunk( assert result == 999.0 # S3 loading is not done from Active anymore - # mock_load.assert_called_once_with(uri) + mock_load.assert_not_called() mock_nz.assert_called_once_with(uri, "data", "s3") # NOTE: This gets called multiple times with various arguments. Match on From d4ab4e759937e76dccce6d5b33cb2e37c7a18c3d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Feb 2024 14:37:37 +0000 Subject: [PATCH 12/14] improve testing --- tests/test_missing.py | 62 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tests/test_missing.py b/tests/test_missing.py index 7f5f4a19..722358af 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -7,6 +7,8 @@ import tempfile import unittest +import h5netcdf + from netCDF4 import Dataset from activestorage.active import Active @@ -240,3 +242,63 @@ def test_validrange(tmp_path): np.testing.assert_array_equal(masked_numpy_mean, active_mean) np.testing.assert_array_equal(no_active_mean, active_mean) + + +def test_active_mask_data(tmp_path): + testfile = str(tmp_path / 'test_partially_missing_data.nc') + + # with valid min + r = dd.make_validmin_ncdata(testfile, valid_min=500) + + # retrieve the actual numpy-ed result + actual_data = load_dataset(testfile) + + # dataset variable + ds = h5netcdf.File(testfile, 'r', invalid_netcdf=True) + dsvar = ds["data"] + + # test the function + data = Active._mask_data(None, actual_data, dsvar) + ds.close() + + # with valid range + r = dd.make_validrange_ncdata(testfile, valid_range=[750., 850.]) + + # retrieve the actual numpy-ed result + actual_data = load_dataset(testfile) + + # dataset variable + ds = h5netcdf.File(testfile, 'r', invalid_netcdf=True) + dsvar = ds["data"] + + # test the function + data = Active._mask_data(None, actual_data, dsvar) + ds.close() + + # with missing + r = dd.make_missing_ncdata(testfile) + + # retrieve the actual numpy-ed result + actual_data = load_dataset(testfile) + + # dataset variable + ds = h5netcdf.File(testfile, 'r', invalid_netcdf=True) + dsvar = ds["data"] + + # test the function + data = Active._mask_data(None, actual_data, dsvar) + ds.close() + + # with _FillValue + r = dd.make_fillvalue_ncdata(testfile) + + # retrieve the actual numpy-ed result + actual_data = load_dataset(testfile) + + # dataset variable + ds = h5netcdf.File(testfile, 'r', invalid_netcdf=True) + dsvar = ds["data"] + + # test the function + data = Active._mask_data(None, actual_data, dsvar) + ds.close() From be23fb9d5b94e26c8eb5d7f1637cf5973195163e Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Feb 2024 14:38:30 +0000 Subject: [PATCH 13/14] rm tab indented --- activestorage/active.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index 82f27f92..841c3a60 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -448,7 +448,7 @@ def _mask_data(self, data, ds_var): valid_min = attrs.get('valid_min') valid_max = attrs.get('valid_max') valid_range = attrs.get('valid_range') - + if valid_max is not None or valid_min is not None: if valid_range is not None: raise ValueError( From 53f4ed4b96939c4ce41fb239557058a6a8cb1e3d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 12 Feb 2024 15:02:16 +0000 Subject: [PATCH 14/14] skip silly S3 test --- tests/unit/test_active.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/test_active.py b/tests/unit/test_active.py index 6570764c..232ffac9 100644 --- a/tests/unit/test_active.py +++ b/tests/unit/test_active.py @@ -5,6 +5,7 @@ from activestorage.active import Active from activestorage.active import load_from_s3 +from activestorage.config import * from botocore.exceptions import EndpointConnectionError as botoExc @@ -100,6 +101,7 @@ def test_lock(): assert active.lock is False +@pytest.mark.skipif(USE_S3 = True, reason="it will look for silly bucket") def test_load_from_s3(): """Test basic load from S3 without loading from S3.""" uri = "s3://bucket/file.nc"