1
1
import os
2
2
import s3fs
3
3
import pathlib
4
- import boto3
4
+ import json
5
5
import moto
6
6
import pyfive
7
7
import pytest
8
8
import h5netcdf
9
9
10
10
from tempfile import NamedTemporaryFile
11
- from test_s3fs import s3 as tests3
12
11
from moto .moto_server .threaded_moto_server import ThreadedMotoServer
13
12
13
+
14
14
# some spoofy server parameters
15
15
port = 5555
16
16
endpoint_uri = "http://127.0.0.1:%s/" % port
17
+ test_bucket_name = "test"
18
+ versioned_bucket_name = "test-versioned"
19
+ secure_bucket_name = "test-secure"
20
+
21
+ def get_boto3_client ():
22
+ from botocore .session import Session
23
+
24
+ # NB: we use the sync botocore client for setup
25
+ session = Session ()
26
+ return session .create_client ("s3" , endpoint_url = endpoint_uri )
17
27
18
28
@pytest .fixture (scope = "module" )
19
29
def s3_base ():
20
30
# writable local S3 system
21
31
22
32
# This fixture is module-scoped, meaning that we can re-use the MotoServer across all tests
33
+ #####
34
+ # lifted from https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py
35
+ #####
23
36
server = ThreadedMotoServer (ip_address = "127.0.0.1" , port = port )
24
37
server .start ()
25
38
if "AWS_SECRET_ACCESS_KEY" not in os .environ :
@@ -34,7 +47,58 @@ def s3_base():
34
47
server .stop ()
35
48
36
49
37
- def spoof_s3 (bucket , file_name , file_path ):
50
+ @pytest .fixture ()
51
+ def s3fs_s3 (s3_base ):
52
+ """
53
+ Create a fully functional "virtual" S3 FileSystem compatible with fsspec/s3fs.
54
+ Method inspired by https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py
55
+ """
56
+ client = get_boto3_client ()
57
+ client .create_bucket (Bucket = test_bucket_name , ACL = "public-read" )
58
+
59
+ client .create_bucket (Bucket = versioned_bucket_name , ACL = "public-read" )
60
+ client .put_bucket_versioning (
61
+ Bucket = versioned_bucket_name , VersioningConfiguration = {"Status" : "Enabled" }
62
+ )
63
+
64
+ # initialize secure bucket
65
+ client .create_bucket (Bucket = secure_bucket_name , ACL = "public-read" )
66
+ policy = json .dumps (
67
+ {
68
+ "Version" : "2012-10-17" ,
69
+ "Id" : "PutObjPolicy" ,
70
+ "Statement" : [
71
+ {
72
+ "Sid" : "DenyUnEncryptedObjectUploads" ,
73
+ "Effect" : "Deny" ,
74
+ "Principal" : "*" ,
75
+ "Action" : "s3:PutObject" ,
76
+ "Resource" : "arn:aws:s3:::{bucket_name}/*" .format (
77
+ bucket_name = secure_bucket_name
78
+ ),
79
+ "Condition" : {
80
+ "StringNotEquals" : {
81
+ "s3:x-amz-server-side-encryption" : "aws:kms"
82
+ }
83
+ },
84
+ }
85
+ ],
86
+ }
87
+ )
88
+
89
+ client .put_bucket_policy (Bucket = secure_bucket_name , Policy = policy )
90
+ s3fs .S3FileSystem .clear_instance_cache ()
91
+ s3 = s3fs .S3FileSystem (anon = False , client_kwargs = {"endpoint_url" : endpoint_uri })
92
+ s3 .invalidate_cache ()
93
+
94
+ yield s3
95
+
96
+
97
+ def spoof_boto3_s3 (bucket , file_name , file_path ):
98
+ # this is a pure boto3 implementation
99
+ # I am leaving it here just in case we'll ever need it in the future
100
+ # NOTE: we are NOT including boto3 as dependency yet, until we ever need it
101
+
38
102
# "put" file
39
103
if os .path .exists (file_path ):
40
104
with open (file_path , "rb" ) as file_contents :
@@ -61,17 +125,18 @@ def spoof_s3(bucket, file_name, file_path):
61
125
object .download_fileobj (ncdata )
62
126
with open ('testobj.nc' , 'rb' ) as ncdata :
63
127
ncfile = h5netcdf .File (ncdata , 'r' , invalid_netcdf = True )
64
- print (ncfile ) # it works but...
65
- # correct coupling between boto3 and s3fs requires yielding
66
- # an s3fs Filesystem,
67
- # see https://stackoverflow.com/questions/75902766/how-to-access-my-own-fake-bucket-with-s3filesystem-pytest-and-moto
128
+ print (ncfile )
68
129
69
130
return res
70
131
71
132
72
133
@pytest .fixture (scope = 'session' )
73
134
def aws_credentials ():
74
- """Mocked AWS Credentials for moto."""
135
+ """
136
+ Mocked AWS Credentials for moto.
137
+ NOTE: Used ONLY by the pure boto3 test method spoof_boto3_s3.
138
+ """
139
+ # NOTE: Used ONLY by the pure boto3 test method spoof_boto3_s3
75
140
os .environ ['AWS_ACCESS_KEY_ID' ] = 'testing'
76
141
os .environ ['AWS_SECRET_ACCESS_KEY' ] = 'testing'
77
142
os .environ ['AWS_SECURITY_TOKEN' ] = 'testing'
@@ -81,8 +146,8 @@ def aws_credentials():
81
146
try :
82
147
tmp = NamedTemporaryFile (delete = False )
83
148
tmp .write (b"""[wild weasel]
84
- aws_access_key_id = testing
85
- aws_secret_access_key = testing""" )
149
+ aws_access_key_id = testing
150
+ aws_secret_access_key = testing""" )
86
151
tmp .close ()
87
152
os .environ ['AWS_SHARED_CREDENTIALS_FILE' ] = str (tmp .name )
88
153
yield
@@ -92,6 +157,8 @@ def aws_credentials():
92
157
93
158
@pytest .fixture (scope = 'function' )
94
159
def empty_bucket (aws_credentials ):
160
+ """Create an empty bucket."""
161
+ # NOTE: Used ONLY by the pure boto3 test method spoof_boto3_s3
95
162
moto_fake = moto .mock_aws ()
96
163
try :
97
164
moto_fake .start ()
@@ -101,38 +168,41 @@ def empty_bucket(aws_credentials):
101
168
finally :
102
169
moto_fake .stop ()
103
170
104
- @pytest .mark .skip (reason = "This test is now obsolete" )
105
- def test_s3file_spoofing (empty_bucket ):
171
+
172
+ @pytest .mark .skip (reason = "This test uses the pure boto3 implement which we don't need at the moment." )
173
+ def test_s3file_with_pure_boto3 (empty_bucket ):
106
174
ncfile = "./tests/test_data/daily_data.nc"
107
175
file_path = pathlib .Path (ncfile )
108
176
file_name = pathlib .Path (ncfile ).name
109
177
# partial spoofing with only boto3+moto
110
178
result = spoof_s3 ("MY_BUCKET" , file_name , file_path )
111
-
112
179
with s3 .open (os .path .join ("MY_BUCKET" , file_name ), "rb" ) as f :
113
180
ncfile = h5netcdf .File (f , 'r' , invalid_netcdf = True )
114
181
assert result .get ('HTTPStatusCode' ) == 200
115
182
116
183
117
- def test_s3file_spoofing_2 ( tests3 ):
184
+ def test_s3file_with_s3fs ( s3fs_s3 ):
118
185
"""
119
- This test spoofs a complete s3fs FileSystem,
186
+ This test spoofs a complete s3fs FileSystem via s3fs_s3 ,
120
187
creates a mock bucket inside it, then puts a REAL netCDF4 file in it,
121
188
then it loads it as if it was an S3 file. This is proper
122
189
Wild Weasel stuff right here.
123
190
"""
191
+ # set up physical file and Path properties
124
192
ncfile = "./tests/test_data/daily_data.nc"
125
193
file_path = pathlib .Path (ncfile )
126
194
file_name = pathlib .Path (ncfile ).name
127
195
128
- # use s3fs proper
196
+ # use mocked s3fs
129
197
bucket = "MY_BUCKET"
130
- tests3 .mkdir (bucket )
131
- tests3 .put (file_path , bucket )
198
+ s3fs_s3 .mkdir (bucket )
199
+ s3fs_s3 .put (file_path , bucket )
132
200
s3 = s3fs .S3FileSystem (
133
201
anon = False , version_aware = True , client_kwargs = {"endpoint_url" : endpoint_uri }
134
202
)
135
203
with s3 .open (os .path .join ("MY_BUCKET" , file_name ), "rb" ) as f :
136
204
ncfile = h5netcdf .File (f , 'r' , invalid_netcdf = True )
137
205
print ("File loaded from spoof S3 with h5netcdf:" , ncfile )
138
- print (x )
206
+ print (ncfile ["ta" ])
207
+
208
+ assert "ta" in ncfile
0 commit comments