Skip to content

Commit fe79079

Browse files
Merge pull request #183 from valeriupredoi/test_storage_options
Correct handling of key and secret for S3 `session` and actual test with key-secret bucket
2 parents 8517401 + 22ac832 commit fe79079

File tree

4 files changed

+103
-3
lines changed

4 files changed

+103
-3
lines changed

.github/workflows/test_s3_remote_reductionist.yml

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ jobs:
4141
- name: Export proxy
4242
run: |
4343
echo 'USE_S3 = True' >> activestorage/config.py
44+
echo 'REMOTE_RED = True' >> activestorage/config.py
4445
- name: Ping remote Reductionist
4546
run: curl -k https://192.171.169.248:8080/.well-known/reductionist-schema
4647
- uses: conda-incubator/setup-miniconda@v2

activestorage/active.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,21 @@ def _from_storage(self, stripped_indexer, drop_axes, out_shape, out_dtype,
348348

349349
# Create a shared session object.
350350
if self.storage_type == "s3":
351-
session = reductionist.get_session(S3_ACCESS_KEY, S3_SECRET_KEY,
352-
S3_ACTIVE_STORAGE_CACERT)
351+
if self.storage_options is not None:
352+
key, secret = None, None
353+
if "key" in self.storage_options:
354+
key = self.storage_options["key"]
355+
if "secret" in self.storage_options:
356+
secret = self.storage_options["secret"]
357+
if key and secret:
358+
session = reductionist.get_session(key, secret,
359+
S3_ACTIVE_STORAGE_CACERT)
360+
else:
361+
session = reductionist.get_session(S3_ACCESS_KEY, S3_SECRET_KEY,
362+
S3_ACTIVE_STORAGE_CACERT)
363+
else:
364+
session = reductionist.get_session(S3_ACCESS_KEY, S3_SECRET_KEY,
365+
S3_ACTIVE_STORAGE_CACERT)
353366
else:
354367
session = None
355368

@@ -451,12 +464,20 @@ def _process_chunk(self, session, fsref, chunk_coords, chunk_selection, counts,
451464

452465
# S3: pass in pre-configured storage options (credentials)
453466
if self.storage_type == "s3":
467+
print("S3 rfile is:", rfile)
454468
parsed_url = urllib.parse.urlparse(rfile)
455469
bucket = parsed_url.netloc
456470
object = parsed_url.path
457471
# FIXME: We do not get the correct byte order on the Zarr Array's dtype
458472
# when using S3, so use the value captured earlier.
459473
dtype = self._dtype
474+
# for certain S3 servers rfile needs to contain the bucket eg "bucket/filename"
475+
# as a result the parser above finds empty string bucket
476+
if bucket == "":
477+
bucket = os.path.dirname(object)
478+
object = os.path.basename(object)
479+
print("S3 bucket:", bucket)
480+
print("S3 file:", object)
460481
if self.storage_options is None:
461482
tmp, count = reductionist.reduce_chunk(session,
462483
S3_ACTIVE_STORAGE_URL,

activestorage/config.py

+3
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,6 @@
2020

2121
# S3 bucket.
2222
S3_BUCKET = "pyactivestorage"
23+
24+
# Remote Reductionist or not
25+
REMOTE_RED = False

tests/test_compression_remote_reductionist.py

+76-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
# otherwise, bucket is extracted automatically from full file uri
2828
S3_BUCKET = "bnl"
2929

30-
3130
@pytest.mark.parametrize("storage_options, active_storage_url", storage_options_paramlist)
3231
def test_compression_and_filters_cmip6_data(storage_options, active_storage_url):
3332
"""
@@ -107,3 +106,79 @@ def test_compression_and_filters_cmip6_forced_s3_from_local(storage_options, act
107106
assert access_denied_err in str(rederr.value)
108107
# assert nc_min == result
109108
# assert result == 239.25946044921875
109+
110+
111+
def test_compression_and_filters_cmip6_forced_s3_from_local_2():
112+
"""
113+
Test use of datasets with compression and filters applied for a real
114+
CMIP6 dataset (CMIP6-test.nc) - an IPSL file.
115+
116+
This is for a special anon=True bucket connected to via valid key.secret
117+
"""
118+
storage_options = {
119+
'key': "f2d55c6dcfc7618b2c34e00b58df3cef",
120+
'secret': "$/'#M{0{/4rVhp%n^(XeX$q@y#&(NM3W1->~N.Q6VP.5[@bLpi='nt]AfH)>78pT",
121+
'client_kwargs': {'endpoint_url': "https://uor-aces-o.s3-ext.jc.rl.ac.uk"}
122+
}
123+
active_storage_url = "https://192.171.169.248:8080"
124+
test_file = str(Path(__file__).resolve().parent / 'test_data' / 'CMIP6-test.nc')
125+
with Dataset(test_file) as nc_data:
126+
nc_min = np.min(nc_data["tas"][0:2,4:6,7:9])
127+
print(f"Numpy min from compressed file {nc_min}")
128+
129+
ofile = os.path.basename(test_file)
130+
test_file_uri = os.path.join(
131+
S3_BUCKET,
132+
ofile
133+
)
134+
print("S3 Test file path:", test_file_uri)
135+
active = Active(test_file_uri, 'tas', storage_type="s3",
136+
storage_options=storage_options,
137+
active_storage_url=active_storage_url)
138+
139+
active._version = 1
140+
active._method = "min"
141+
142+
result = active[0:2,4:6,7:9]
143+
assert nc_min == result
144+
assert result == 239.25946044921875
145+
146+
147+
@pytest.mark.skipif(not USE_S3, reason="we need only localhost Reductionist in GA CI")
148+
@pytest.mark.skipif(REMOTE_RED, reason="we need only localhost Reductionist in GA CI")
149+
def test_compression_and_filters_cmip6_forced_s3_using_local_Reductionist():
150+
"""
151+
Test use of datasets with compression and filters applied for a real
152+
CMIP6 dataset (CMIP6-test.nc) - an IPSL file.
153+
154+
This is for a special anon=True bucket connected to via valid key.secret
155+
and uses the locally deployed Reductionist via container.
156+
"""
157+
print("Reductionist URL", S3_ACTIVE_STORAGE_URL)
158+
storage_options = {
159+
'key': "f2d55c6dcfc7618b2c34e00b58df3cef",
160+
'secret': "$/'#M{0{/4rVhp%n^(XeX$q@y#&(NM3W1->~N.Q6VP.5[@bLpi='nt]AfH)>78pT",
161+
'client_kwargs': {'endpoint_url': "https://uor-aces-o.s3-ext.jc.rl.ac.uk"}
162+
}
163+
164+
test_file = str(Path(__file__).resolve().parent / 'test_data' / 'CMIP6-test.nc')
165+
with Dataset(test_file) as nc_data:
166+
nc_min = np.min(nc_data["tas"][0:2,4:6,7:9])
167+
print(f"Numpy min from compressed file {nc_min}")
168+
169+
ofile = os.path.basename(test_file)
170+
test_file_uri = os.path.join(
171+
S3_BUCKET,
172+
ofile
173+
)
174+
print("S3 Test file path:", test_file_uri)
175+
active = Active(test_file_uri, 'tas', storage_type="s3",
176+
storage_options=storage_options,
177+
active_storage_url=S3_ACTIVE_STORAGE_URL)
178+
179+
active._version = 1
180+
active._method = "min"
181+
182+
result = active[0:2,4:6,7:9]
183+
assert nc_min == result
184+
assert result == 239.25946044921875

0 commit comments

Comments
 (0)