Skip to content

Commit bd1f06f

Browse files
Merge pull request #234 from NCAS-CMS/fix_pyfive_branch
Fix `pyfive` branch with latest Pyfive branch `h5netdf`
2 parents 5f67387 + 41c9ce9 commit bd1f06f

7 files changed

+38
-34
lines changed

.github/workflows/run-test-push.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ jobs:
2929
use-mamba: true
3030
- run: conda --version
3131
- run: python -V
32-
- name: Install development version of bnlawrence/Pyfive:issue60
32+
- name: Install development version of NCAS-CMS/Pyfive:h5netcdf
3333
run: |
3434
cd ..
35-
git clone https://github.com/bnlawrence/pyfive.git
35+
git clone https://github.com/NCAS-CMS/pyfive.git
3636
cd pyfive
37-
git checkout issue60
37+
git checkout h5netcdf
3838
pip install -e .
3939
- run: pip install -e .
4040
- run: conda list

.github/workflows/run-tests.yml

+6-6
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ jobs:
3434
use-mamba: true
3535
- run: conda --version
3636
- run: python -V
37-
- name: Install development version of bnlawrence/Pyfive:issue60
37+
- name: Install development version of NCAS-CMS/Pyfive:h5netcdf
3838
run: |
3939
cd ..
40-
git clone https://github.com/bnlawrence/pyfive.git
40+
git clone https://github.com/NCAS-CMS/pyfive.git
4141
cd pyfive
42-
git checkout issue60
42+
git checkout h5netcdf
4343
pip install -e .
4444
- run: conda list
4545
- run: pip install -e .
@@ -66,12 +66,12 @@ jobs:
6666
use-mamba: true
6767
- run: conda --version
6868
- run: python -V
69-
- name: Install development version of bnlawrence/Pyfive:issue60
69+
- name: Install development version of NCAS-CMS/Pyfive:h5netcdf
7070
run: |
7171
cd ..
72-
git clone https://github.com/bnlawrence/pyfive.git
72+
git clone https://github.com/NCAS-CMS/pyfive.git
7373
cd pyfive
74-
git checkout issue60
74+
git checkout h5netcdf
7575
pip install -e .
7676
- run: conda list
7777
- run: mamba install -c conda-forge git

.github/workflows/test_s3_minio.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,12 @@ jobs:
5656
python-version: ${{ matrix.python-version }}
5757
miniforge-version: "latest"
5858
use-mamba: true
59-
- name: Install development version of bnlawrence/Pyfive:issue60
59+
- name: Install development version of NCAS-CMS/Pyfive:h5netcdf
6060
run: |
6161
cd ..
62-
git clone https://github.com/bnlawrence/pyfive.git
62+
git clone https://github.com/NCAS-CMS/pyfive.git
6363
cd pyfive
64-
git checkout issue60
64+
git checkout h5netcdf
6565
pip install -e .
6666
- name: Install PyActiveStorage
6767
run: |

.github/workflows/test_s3_remote_reductionist.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,12 @@ jobs:
5151
python-version: ${{ matrix.python-version }}
5252
miniforge-version: "latest"
5353
use-mamba: true
54-
- name: Install development version of bnlawrence/Pyfive:issue60
54+
- name: Install development version of NCAS-CMS/Pyfive:h5netcdf
5555
run: |
5656
cd ..
57-
git clone https://github.com/bnlawrence/pyfive.git
57+
git clone https://github.com/NCAS-CMS/pyfive.git
5858
cd pyfive
59-
git checkout issue60
59+
git checkout h5netcdf
6060
pip install -e .
6161
- name: Install PyActiveStorage
6262
run: |

activestorage/active.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import urllib
66
import pyfive
77
import time
8+
from pyfive.h5d import StoreInfo
89

910
import s3fs
1011

@@ -307,8 +308,8 @@ def _get_selection(self, *args):
307308
name = self.ds.name
308309
dtype = np.dtype(self.ds.dtype)
309310
# hopefully fix pyfive to get a dtype directly
310-
array = pyfive.ZarrArrayStub(self.ds.shape, self.ds.chunks)
311-
ds = self.ds._dataobjects
311+
array = pyfive.indexing.ZarrArrayStub(self.ds.shape, self.ds.chunks)
312+
ds = self.ds.id
312313

313314
self.metric_data['args'] = args
314315
self.metric_data['dataset shape'] = self.ds.shape
@@ -318,7 +319,7 @@ def _get_selection(self, *args):
318319
else:
319320
compressor, filters = decode_filters(ds.filter_pipeline , dtype.itemsize, name)
320321

321-
indexer = pyfive.OrthogonalIndexer(*args, array)
322+
indexer = pyfive.indexing.OrthogonalIndexer(*args, array)
322323
out_shape = indexer.shape
323324
#stripped_indexer = [(a, b, c) for a,b,c in indexer]
324325
drop_axes = indexer.drop_axes and keepdims
@@ -334,7 +335,7 @@ def _from_storage(self, ds, indexer, chunks, out_shape, out_dtype, compressor, f
334335
out = []
335336
counts = []
336337
else:
337-
out = np.empty(out_shape, dtype=out_dtype, order=ds.order)
338+
out = np.empty(out_shape, dtype=out_dtype, order=ds._order)
338339
counts = None # should never get touched with no method!
339340

340341
# Create a shared session object.
@@ -364,10 +365,10 @@ def _from_storage(self, ds, indexer, chunks, out_shape, out_dtype, compressor, f
364365

365366
if ds.chunks is not None:
366367
t1 = time.time()
367-
ds._get_chunk_addresses()
368+
# ds._get_chunk_addresses()
368369
t2 = time.time() - t1
369370
self.metric_data['indexing time (s)'] = t2
370-
self.metric_data['chunk number'] = len(ds._zchunk_index)
371+
# self.metric_data['chunk number'] = len(ds._zchunk_index)
371372
chunk_count = 0
372373
t1 = time.time()
373374
with concurrent.futures.ThreadPoolExecutor(max_workers=self._max_threads) as executor:
@@ -464,15 +465,17 @@ def _process_chunk(self, session, ds, chunks, chunk_coords, chunk_selection, cou
464465
#FIXME: Do, we, it's not actually used?
465466
466467
"""
467-
468-
offset, size, filter_mask = ds.get_chunk_details(chunk_coords)
468+
469+
# retrieve coordinates from chunk index
470+
storeinfo = ds.get_chunk_info_from_chunk_coord(chunk_coords)
471+
offset, size = storeinfo.byte_offset, storeinfo.size
469472
self.data_read += size
470473

471474
if self.storage_type == 's3' and self._version == 1:
472475

473-
tmp, count = reduce_opens3_chunk(ds.fh, offset, size, compressor, filters,
476+
tmp, count = reduce_opens3_chunk(ds._fh, offset, size, compressor, filters,
474477
self.missing, ds.dtype,
475-
chunks, ds.order,
478+
chunks, ds._order,
476479
chunk_selection, method=self.method
477480
)
478481

@@ -499,7 +502,7 @@ def _process_chunk(self, session, ds, chunks, chunk_coords, chunk_selection, cou
499502
size, compressor, filters,
500503
self.missing, np.dtype(ds.dtype),
501504
chunks,
502-
ds.order,
505+
ds._order,
503506
chunk_selection,
504507
operation=self._method)
505508
else:
@@ -518,7 +521,7 @@ def _process_chunk(self, session, ds, chunks, chunk_coords, chunk_selection, cou
518521
size, compressor, filters,
519522
self.missing, np.dtype(ds.dtype),
520523
chunks,
521-
ds.order,
524+
ds._order,
522525
chunk_selection,
523526
operation=self._method)
524527
elif self.storage_type=='ActivePosix' and self.version==2:
@@ -531,7 +534,7 @@ def _process_chunk(self, session, ds, chunks, chunk_coords, chunk_selection, cou
531534
# although we will version changes.
532535
tmp, count = reduce_chunk(self.filename, offset, size, compressor, filters,
533536
self.missing, ds.dtype,
534-
chunks, ds.order,
537+
chunks, ds._order,
535538
chunk_selection, method=self.method)
536539

537540
if self.method is not None:

activestorage/hdf2numcodec.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def decode_filters(filter_pipeline, itemsize, name):
2828
for filter in filter_pipeline:
2929

3030
filter_id=filter['filter_id']
31-
properties = filter['client_data_values']
31+
properties = filter['client_data']
3232

3333

3434
# We suppor the following

tests/test_reductionist_json.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ def __init__(self, f, v):
1818
self.f = pyfive.File(f)
1919
ds = self.f[v]
2020
self.dtype = np.dtype(ds.dtype)
21-
self.array = pyfive.ZarrArrayStub(ds.shape, ds.chunks or ds.shape)
21+
self.array = pyfive.indexing.ZarrArrayStub(ds.shape, ds.chunks or ds.shape)
2222
self.missing = get_missing_attributes(ds)
23-
ds = ds._dataobjects
23+
ds = ds.id
2424
self.ds = ds
2525
def __getitem__(self, args):
2626
if self.ds.filter_pipeline is None:
@@ -30,12 +30,13 @@ def __getitem__(self, args):
3030
if self.ds.chunks is not None:
3131
self.ds._get_chunk_addresses()
3232

33-
indexer = pyfive.OrthogonalIndexer(args, self.array)
33+
indexer = pyfive.indexing.OrthogonalIndexer(args, self.array)
3434
for chunk_coords, chunk_selection, out_selection in indexer:
35-
offset, size, filter_mask = self.ds.get_chunk_details(chunk_coords)
35+
storeinfo = self.ds.get_chunk_info_from_chunk_coord(chunk_coords)
36+
offset, size = storeinfo.byte_offset, storeinfo.size
3637
jd = reductionist.build_request_data('a','b','c',
3738
offset, size, compressor, filters, self.missing, self.dtype,
38-
self.array._chunks,self.ds.order,chunk_selection)
39+
self.array._chunks,self.ds._order,chunk_selection)
3940
js = json.dumps(jd)
4041
return None
4142

0 commit comments

Comments
 (0)