@@ -68,6 +68,21 @@ def _return_zcomponents(content, varname):
68
68
return zarray , zattrs
69
69
70
70
71
+ def _load_ds (s3file_o_1 , varname ):
72
+ s3file_r_1 = h5py .File (s3file_o_1 , mode = "r" )
73
+ s3file_w_1 = h5py .File (s3file_o_1 , mode = "w" )
74
+ if isinstance (s3file_r_1 [varname ], h5py .Dataset ):
75
+ print ("Looking only at a single Dataset" , s3file_r_1 [varname ])
76
+ s3file_w_1 .create_group (varname + " " )
77
+ s3file_w_1 [varname + " " ][varname ] = s3file_w_1 [varname ]
78
+ s3file = s3file_w_1 [varname + " " ]
79
+ elif isinstance (s3file_r_1 [varname ], h5py .Group ):
80
+ print ("Looking only at a single Group" , s3file_r_1 [varname ])
81
+ s3file = s3file_r_1 [varname ]
82
+
83
+ return s3file
84
+
85
+
71
86
def gen_json (file_url , varname , outf , storage_type , storage_options ):
72
87
"""Generate a json file that contains the kerchunk-ed data for Zarr."""
73
88
# S3 configuration presets
@@ -79,23 +94,11 @@ def gen_json(file_url, varname, outf, storage_type, storage_options):
79
94
default_cache_type = "first" # best for HDF5
80
95
)
81
96
fs2 = fsspec .filesystem ('' )
82
- with fs .open (file_url , 'rb' ) as s3file :
83
- # with fs.open(file_url, 'rb') as s3file_o_1:
84
- # this block, together with the block down, is causing
85
- # SegFaults; h5py backend very brittle: see below for reasoning behind this
86
- # s3file_r_1 = h5py.File(s3file_o_1, mode="r")
87
- # s3file_w_1 = h5py.File(s3file_o_1, mode="w")
88
- # if isinstance(s3file_r_1[varname], h5py.Dataset):
89
- # print("Looking only at a single Dataset", s3file_r_1[varname])
90
- # s3file_w_1.create_group(varname + " ")
91
- # s3file_w_1[varname + " "][varname] = s3file_w_1[varname]
92
- # s3file = s3file_w_1[varname + " "]
93
- # elif isinstance(s3file_r_1[varname], h5py.Group):
94
- # print("Looking only at a single Group", s3file_r_1[varname])
95
- # s3file = s3file_r_1[varname]
96
- # storage_options = {"key": S3_ACCESS_KEY,
97
- # "secret": S3_SECRET_KEY,
98
- # "client_kwargs": {'endpoint_url': S3_URL}}
97
+ with fs .open (file_url , 'rb' ) as s3file_o_1 :
98
+ s3file = _load_ds (s3file_o_1 , varname )
99
+ storage_options = {"key" : S3_ACCESS_KEY ,
100
+ "secret" : S3_SECRET_KEY ,
101
+ "client_kwargs" : {'endpoint_url' : S3_URL }}
99
102
h5chunks = SingleHdf5ToZarr (s3file , file_url ,
100
103
inline_threshold = 0 )
101
104
# storage_options=storage_options)
0 commit comments