Skip to content

Commit 0e43748

Browse files
Merge pull request #232 from NCAS-CMS/add_mock_s3_test
Add mock s3 test
2 parents 582137b + ea54fda commit 0e43748

File tree

4 files changed

+259
-0
lines changed

4 files changed

+259
-0
lines changed

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dependencies:
2121
# see github.com/zarr-developers/zarr-python/issues/1362
2222
- zarr >=2.13.6 # KVStore to FSStore
2323
# Python packages for testing
24+
- moto # mock S3 tests
2425
- pytest
2526
- pytest-cov >=2.10.1
2627
- pytest-html !=2.1.0

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
# pin Zarr to use new FSStore instead of KVStore
3232
'zarr>=2.13.3', # github.com/zarr-developers/zarr-python/issues/1362
3333
# for testing
34+
'moto', # mock S3 tests
3435
'pytest',
3536
'pytest-cov>=2.10.1',
3637
'pytest-html!=2.1.0',

tests/conftest.py

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import os
2+
import s3fs
3+
import pathlib
4+
import json
5+
import moto
6+
import pytest
7+
8+
from moto.moto_server.threaded_moto_server import ThreadedMotoServer
9+
10+
11+
# some spoofy server parameters
12+
# test parameters; don't modify these
13+
port = 5555
14+
endpoint_uri = "http://127.0.0.1:%s/" % port
15+
test_bucket_name = "test"
16+
versioned_bucket_name = "test-versioned"
17+
secure_bucket_name = "test-secure"
18+
19+
def get_boto3_client():
20+
from botocore.session import Session
21+
22+
# NB: we use the sync botocore client for setup
23+
session = Session()
24+
return session.create_client("s3", endpoint_url=endpoint_uri)
25+
26+
27+
@pytest.fixture(scope="module")
28+
def s3_base():
29+
# writable local S3 system
30+
31+
# This fixture is module-scoped, meaning that we can re-use the MotoServer across all tests
32+
#####
33+
# lifted from https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py
34+
#####
35+
server = ThreadedMotoServer(ip_address="127.0.0.1", port=port)
36+
server.start()
37+
# the user ID and secret key are needed when accessing a public bucket
38+
# since our S3 FS and bucket are not actually on an AWS system, they can have
39+
# bogus values
40+
if "AWS_SECRET_ACCESS_KEY" not in os.environ:
41+
os.environ["AWS_SECRET_ACCESS_KEY"] = "foo"
42+
if "AWS_ACCESS_KEY_ID" not in os.environ:
43+
os.environ["AWS_ACCESS_KEY_ID"] = "foo"
44+
os.environ.pop("AWS_PROFILE", None)
45+
46+
print("server up")
47+
yield
48+
print("moto done")
49+
server.stop()
50+
51+
52+
@pytest.fixture()
53+
def s3fs_s3(s3_base):
54+
"""
55+
Create a fully functional "virtual" S3 FileSystem compatible with fsspec/s3fs.
56+
Method inspired by https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py
57+
58+
The S3 FS, being AWS-like but not actually physically deployed anywhere, still needs
59+
all the usual user IDs, secret keys, endpoint URLs etc; the setup makes use of the ACL=public
60+
configuration (public-read, or public-read-write). Public DOES NOT mean anon=True, but rather,
61+
All Users group – https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html
62+
Access permission to this group allows anyone with AWS credentials to access the resource.
63+
The requests need be signed (authenticated) or not.
64+
65+
Also, keys are encrypted using AWS-KMS
66+
https://docs.aws.amazon.com/kms/latest/developerguide/overview.html
67+
"""
68+
client = get_boto3_client()
69+
70+
# see not above about ACL=public-read
71+
client.create_bucket(Bucket=test_bucket_name, ACL="public-read")
72+
73+
client.create_bucket(Bucket=versioned_bucket_name, ACL="public-read")
74+
client.put_bucket_versioning(
75+
Bucket=versioned_bucket_name, VersioningConfiguration={"Status": "Enabled"}
76+
)
77+
78+
# initialize secure bucket
79+
client.create_bucket(Bucket=secure_bucket_name, ACL="public-read")
80+
policy = json.dumps(
81+
{
82+
"Version": "2012-10-17",
83+
"Id": "PutObjPolicy",
84+
"Statement": [
85+
{
86+
"Sid": "DenyUnEncryptedObjectUploads",
87+
"Effect": "Deny",
88+
"Principal": "*",
89+
"Action": "s3:PutObject",
90+
"Resource": "arn:aws:s3:::{bucket_name}/*".format(
91+
bucket_name=secure_bucket_name
92+
),
93+
"Condition": {
94+
"StringNotEquals": {
95+
"s3:x-amz-server-side-encryption": "aws:kms"
96+
}
97+
},
98+
}
99+
],
100+
}
101+
)
102+
103+
client.put_bucket_policy(Bucket=secure_bucket_name, Policy=policy)
104+
s3fs.S3FileSystem.clear_instance_cache()
105+
s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_uri})
106+
s3.invalidate_cache()
107+
108+
yield s3

tests/unit/test_mock_s3.py

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import os
2+
import s3fs
3+
import pathlib
4+
import pytest
5+
import h5netcdf
6+
7+
from tempfile import NamedTemporaryFile
8+
from activestorage.active import load_from_s3
9+
10+
11+
# needed by the spoofed s3 filesystem
12+
port = 5555
13+
endpoint_uri = "http://127.0.0.1:%s/" % port
14+
15+
16+
def test_s3fs_s3(s3fs_s3):
17+
"""Test mock S3 filesystem constructor."""
18+
# this is an entire mock S3 FS
19+
mock_s3_filesystem = s3fs_s3
20+
21+
# explore its attributes and methods
22+
print(dir(mock_s3_filesystem))
23+
24+
assert not mock_s3_filesystem.anon
25+
assert not mock_s3_filesystem.version_aware
26+
assert mock_s3_filesystem.client_kwargs == {'endpoint_url': 'http://127.0.0.1:5555/'}
27+
28+
29+
def spoof_boto3_s3(bucket, file_name, file_path):
30+
# this is a pure boto3 implementation
31+
# I am leaving it here just in case we'll ever need it in the future
32+
# NOTE: we are NOT including boto3 as dependency yet, until we ever need it
33+
34+
# "put" file
35+
if os.path.exists(file_path):
36+
with open(file_path, "rb") as file_contents:
37+
conn = boto3.session.Session()
38+
s3 = conn.resource('s3')
39+
object = s3.Object(bucket, file_name)
40+
result = object.put(Body=file_contents)
41+
res = result.get('ResponseMetadata')
42+
if res.get('HTTPStatusCode') == 200:
43+
print('File Uploaded Successfully')
44+
else:
45+
print('File Not Uploaded Successfully')
46+
47+
# "download" file
48+
s3 = boto3.resource('s3')
49+
# arg0: file in bucket; arg1: file to download to
50+
target_file = "test.nc"
51+
s3file = s3.Bucket(bucket).download_file(file_name, target_file)
52+
print(os.path.isfile(target_file))
53+
54+
# "access" file "remotely" with s3fs
55+
fs = s3fs.S3FileSystem(anon=True)
56+
with open('testobj.nc', 'wb') as ncdata:
57+
object.download_fileobj(ncdata)
58+
with open('testobj.nc', 'rb') as ncdata:
59+
ncfile = h5netcdf.File(ncdata, 'r', invalid_netcdf=True)
60+
print(ncfile)
61+
62+
return res
63+
64+
65+
@pytest.fixture(scope='session')
66+
def aws_credentials():
67+
"""
68+
Mocked AWS Credentials for moto.
69+
NOTE: Used ONLY by the pure boto3 test method spoof_boto3_s3.
70+
"""
71+
# NOTE: Used ONLY by the pure boto3 test method spoof_boto3_s3
72+
os.environ['AWS_ACCESS_KEY_ID'] = 'testing'
73+
os.environ['AWS_SECRET_ACCESS_KEY'] = 'testing'
74+
os.environ['AWS_SECURITY_TOKEN'] = 'testing'
75+
os.environ['AWS_SESSION_TOKEN'] = 'testing'
76+
os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'
77+
78+
try:
79+
tmp = NamedTemporaryFile(delete=False)
80+
tmp.write(b"""[wild weasel]
81+
aws_access_key_id = testing
82+
aws_secret_access_key = testing""")
83+
tmp.close()
84+
os.environ['AWS_SHARED_CREDENTIALS_FILE'] = str(tmp.name)
85+
yield
86+
finally:
87+
os.unlink(tmp.name)
88+
89+
90+
@pytest.fixture(scope='function')
91+
def empty_bucket(aws_credentials):
92+
"""Create an empty bucket."""
93+
# NOTE: Used ONLY by the pure boto3 test method spoof_boto3_s3
94+
moto_fake = moto.mock_aws()
95+
try:
96+
moto_fake.start()
97+
conn = boto3.resource('s3')
98+
conn.create_bucket(Bucket="MY_BUCKET")
99+
yield conn
100+
finally:
101+
moto_fake.stop()
102+
103+
104+
@pytest.mark.skip(reason="This test uses the pure boto3 implement which we don't need at the moment.")
105+
def test_s3file_with_pure_boto3(empty_bucket):
106+
ncfile = "./tests/test_data/daily_data.nc"
107+
file_path = pathlib.Path(ncfile)
108+
file_name = pathlib.Path(ncfile).name
109+
# partial spoofing with only boto3+moto
110+
result = spoof_s3("MY_BUCKET", file_name, file_path)
111+
with s3.open(os.path.join("MY_BUCKET", file_name), "rb") as f:
112+
ncfile = h5netcdf.File(f, 'r', invalid_netcdf=True)
113+
assert result.get('HTTPStatusCode') == 200
114+
115+
116+
def test_s3file_with_s3fs(s3fs_s3):
117+
"""
118+
This test spoofs a complete s3fs FileSystem via s3fs_s3,
119+
creates a mock bucket inside it, then puts a REAL netCDF4 file in it,
120+
then it loads it as if it was an S3 file. This is proper
121+
Wild Weasel stuff right here.
122+
"""
123+
# set up physical file and Path properties
124+
ncfile = "./tests/test_data/daily_data.nc"
125+
file_path = pathlib.Path(ncfile)
126+
file_name = pathlib.Path(ncfile).name
127+
128+
# use mocked s3fs
129+
bucket = "MY_BUCKET"
130+
s3fs_s3.mkdir(bucket)
131+
s3fs_s3.put(file_path, bucket)
132+
s3 = s3fs.S3FileSystem(
133+
anon=False, version_aware=True, client_kwargs={"endpoint_url": endpoint_uri}
134+
)
135+
136+
# test load by h5netcdf
137+
with s3.open(os.path.join("MY_BUCKET", file_name), "rb") as f:
138+
print("File path", f.path)
139+
ncfile = h5netcdf.File(f, 'r', invalid_netcdf=True)
140+
print("File loaded from spoof S3 with h5netcdf:", ncfile)
141+
print(ncfile["ta"])
142+
assert "ta" in ncfile
143+
144+
# test Active
145+
storage_options = dict(anon=False, version_aware=True,
146+
client_kwargs={"endpoint_url": endpoint_uri})
147+
with load_from_s3(os.path.join("MY_BUCKET", file_name), storage_options) as ac_file:
148+
print(ac_file)
149+
assert "ta" in ac_file

0 commit comments

Comments
 (0)