Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct handling of key and secret for S3 session and actual test with key-secret bucket #183

Merged
merged 9 commits into from
Feb 15, 2024
1 change: 1 addition & 0 deletions .github/workflows/test_s3_remote_reductionist.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ jobs:
- name: Export proxy
run: |
echo 'USE_S3 = True' >> activestorage/config.py
echo 'REMOTE_RED = True' >> activestorage/config.py
- name: Ping remote Reductionist
run: curl -k https://192.171.169.248:8080/.well-known/reductionist-schema
- uses: conda-incubator/setup-miniconda@v2
Expand Down
25 changes: 23 additions & 2 deletions activestorage/active.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,21 @@ def _from_storage(self, stripped_indexer, drop_axes, out_shape, out_dtype,

# Create a shared session object.
if self.storage_type == "s3":
session = reductionist.get_session(S3_ACCESS_KEY, S3_SECRET_KEY,
S3_ACTIVE_STORAGE_CACERT)
if self.storage_options is not None:
key, secret = None, None
if "key" in self.storage_options:
key = self.storage_options["key"]
if "secret" in self.storage_options:
secret = self.storage_options["secret"]
if key and secret:
session = reductionist.get_session(key, secret,
S3_ACTIVE_STORAGE_CACERT)
else:
session = reductionist.get_session(S3_ACCESS_KEY, S3_SECRET_KEY,
S3_ACTIVE_STORAGE_CACERT)
else:
session = reductionist.get_session(S3_ACCESS_KEY, S3_SECRET_KEY,
S3_ACTIVE_STORAGE_CACERT)
else:
session = None

Expand Down Expand Up @@ -451,12 +464,20 @@ def _process_chunk(self, session, fsref, chunk_coords, chunk_selection, counts,

# S3: pass in pre-configured storage options (credentials)
if self.storage_type == "s3":
print("S3 rfile is:", rfile)
parsed_url = urllib.parse.urlparse(rfile)
bucket = parsed_url.netloc
object = parsed_url.path
# FIXME: We do not get the correct byte order on the Zarr Array's dtype
# when using S3, so use the value captured earlier.
dtype = self._dtype
# for certain S3 servers rfile needs to contain the bucket eg "bucket/filename"
# as a result the parser above finds empty string bucket
if bucket == "":
bucket = os.path.dirname(object)
object = os.path.basename(object)
print("S3 bucket:", bucket)
print("S3 file:", object)
if self.storage_options is None:
tmp, count = reductionist.reduce_chunk(session,
S3_ACTIVE_STORAGE_URL,
Expand Down
3 changes: 3 additions & 0 deletions activestorage/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@

# S3 bucket.
S3_BUCKET = "pyactivestorage"

# Remote Reductionist or not
REMOTE_RED = False
77 changes: 76 additions & 1 deletion tests/test_compression_remote_reductionist.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
# otherwise, bucket is extracted automatically from full file uri
S3_BUCKET = "bnl"


@pytest.mark.parametrize("storage_options, active_storage_url", storage_options_paramlist)
def test_compression_and_filters_cmip6_data(storage_options, active_storage_url):
"""
Expand Down Expand Up @@ -107,3 +106,79 @@ def test_compression_and_filters_cmip6_forced_s3_from_local(storage_options, act
assert access_denied_err in str(rederr.value)
# assert nc_min == result
# assert result == 239.25946044921875


def test_compression_and_filters_cmip6_forced_s3_from_local_2():
"""
Test use of datasets with compression and filters applied for a real
CMIP6 dataset (CMIP6-test.nc) - an IPSL file.

This is for a special anon=True bucket connected to via valid key.secret
"""
storage_options = {
'key': "f2d55c6dcfc7618b2c34e00b58df3cef",
'secret': "$/'#M{0{/4rVhp%n^(XeX$q@y#&(NM3W1->~N.Q6VP.5[@bLpi='nt]AfH)>78pT",
'client_kwargs': {'endpoint_url': "https://uor-aces-o.s3-ext.jc.rl.ac.uk"}
}
active_storage_url = "https://192.171.169.248:8080"
test_file = str(Path(__file__).resolve().parent / 'test_data' / 'CMIP6-test.nc')
with Dataset(test_file) as nc_data:
nc_min = np.min(nc_data["tas"][0:2,4:6,7:9])
print(f"Numpy min from compressed file {nc_min}")

ofile = os.path.basename(test_file)
test_file_uri = os.path.join(
S3_BUCKET,
ofile
)
print("S3 Test file path:", test_file_uri)
active = Active(test_file_uri, 'tas', storage_type="s3",
storage_options=storage_options,
active_storage_url=active_storage_url)

active._version = 1
active._method = "min"

result = active[0:2,4:6,7:9]
assert nc_min == result
assert result == 239.25946044921875


@pytest.mark.skipif(not USE_S3, reason="we need only localhost Reductionist in GA CI")
@pytest.mark.skipif(REMOTE_RED, reason="we need only localhost Reductionist in GA CI")
def test_compression_and_filters_cmip6_forced_s3_using_local_Reductionist():
"""
Test use of datasets with compression and filters applied for a real
CMIP6 dataset (CMIP6-test.nc) - an IPSL file.

This is for a special anon=True bucket connected to via valid key.secret
and uses the locally deployed Reductionist via container.
"""
print("Reductionist URL", S3_ACTIVE_STORAGE_URL)
storage_options = {
'key': "f2d55c6dcfc7618b2c34e00b58df3cef",
'secret': "$/'#M{0{/4rVhp%n^(XeX$q@y#&(NM3W1->~N.Q6VP.5[@bLpi='nt]AfH)>78pT",
'client_kwargs': {'endpoint_url': "https://uor-aces-o.s3-ext.jc.rl.ac.uk"}
}

test_file = str(Path(__file__).resolve().parent / 'test_data' / 'CMIP6-test.nc')
with Dataset(test_file) as nc_data:
nc_min = np.min(nc_data["tas"][0:2,4:6,7:9])
print(f"Numpy min from compressed file {nc_min}")

ofile = os.path.basename(test_file)
test_file_uri = os.path.join(
S3_BUCKET,
ofile
)
print("S3 Test file path:", test_file_uri)
active = Active(test_file_uri, 'tas', storage_type="s3",
storage_options=storage_options,
active_storage_url=S3_ACTIVE_STORAGE_URL)

active._version = 1
active._method = "min"

result = active[0:2,4:6,7:9]
assert nc_min == result
assert result == 239.25946044921875
Loading