1
+ from os import lseek
1
2
from netCDF4 import Dataset
2
3
import numpy as np
3
4
import zarr
@@ -18,44 +19,147 @@ def _make_data(n=10):
18
19
return dd , data
19
20
20
21
21
- def make_test_ncdata (filename = 'test_bizarre .nc' , chunksize = (3 ,3 ,1 ), compression = None , n = 10 ):
22
+ def make_missing_ncdata (filename = 'test_missing .nc' , chunksize = (3 ,3 ,1 ), n = 10 ):
22
23
"""
23
- Make a test dataset which is three dimensional with indices and values that
24
- aid in testing data extraction. If compression is required, it can be passed in via keyword
25
- and is applied to all variables
24
+ Makes a test dataset based on the default vanilla dataset, but which includes
25
+ some missing values
26
26
"""
27
+ return make_ncdata (filename , chunksize , n , compression = None , missing = - 999. )
28
+
29
+ def make_fillvalue_ncdata (filename = 'test_fillvalue.nc' , chunksize = (3 ,3 ,1 ), n = 10 ):
30
+ """
31
+ Makes a test dataset based on the default vanilla dataset, but which includes
32
+ some non-default fillvalues.
33
+ """
34
+ return make_ncdata (filename , chunksize , n , compression = None , fillvalue = - 999. )
35
+
36
+ def make_validmin_ncdata (filename = 'test_validmin.nc' , chunksize = (3 ,3 ,1 ), n = 10 ):
37
+ """
38
+ Makes a test dataset based on the default vanilla dataset, but which includes
39
+ missing values below min.
40
+ """
41
+ return make_ncdata (filename , chunksize , n , compression = None , valid_min = - 1. )
42
+
43
+ def make_validmax_ncdata (filename = 'test_validmax.nc' , chunksize = (3 ,3 ,1 ), n = 10 ):
44
+ """
45
+ Makes a test dataset based on the default vanilla dataset, but which includes
46
+ missing values above max
47
+ """
48
+ return make_ncdata (filename , chunksize , n , compression = None , valid_max = 1.2 * n ** 3 )
49
+
50
+
51
+ def make_validrange_ncdata (filename = 'test_validrange.nc' , chunksize = (3 ,3 ,1 ), n = 10 ):
52
+ """
53
+ Makes a test dataset based on the default vanilla dataset, but which includes
54
+ missing values outside range
55
+ """
56
+ return make_ncdata (filename , chunksize , n , compression = None , valid_range = [- 1.0 ,1.2 * n ** 3 ])
57
+
58
+ def make_vanilla_ncdata (filename = 'test_vanilla.nc' , chunksize = (3 ,3 ,1 ), n = 10 ):
59
+ """
60
+ Make a vanilla test dataset which is three dimensional with indices and values that
61
+ aid in testing data extraction.
62
+ """
63
+ r = make_ncdata (filename , chunksize , n , None , False )
64
+ return
65
+
66
+ def make_ncdata (filename , chunksize , n , compression = None ,
67
+ missing = None ,
68
+ fillvalue = None ,
69
+ valid_range = None ,
70
+ valid_min = None ,
71
+ valid_max = None ):
72
+ """
73
+ If compression is required, it can be passed in via keyword
74
+ and is applied to all variables.
75
+
76
+ Note that if compression is not None, or any of the valid
77
+ data options (missing etc) are selected, then four values
78
+ (for each option) are modified and made invalid.
79
+
80
+ For the purposes of test data, bounds (valid_min, range etc)
81
+ need to be non-zero, although that wont hold in real life.
82
+ """
83
+
84
+ def make_holes (var , indices , attribute , value , dummy ):
85
+ if value is not None :
86
+ assert type (value ) == float
87
+ setattr (var ,attribute ,value )
88
+ for i ,j ,k in indices :
89
+ var [i ,j ,k ] = dummy
90
+
91
+ assert n > 4
92
+
27
93
ds = Dataset (filename , 'w' , format = "NETCDF4" )
28
94
dd , data = _make_data (n )
29
95
30
96
xdim = ds .createDimension ("xdim" ,n )
31
97
ydim = ds .createDimension ("ydim" ,n )
32
98
zdim = ds .createDimension ("zdim" ,n )
33
- x = ds .createVariable ("x" ,"i4" ,("xdim" ,), compression = compression )
34
- y = ds .createVariable ("y" ,"i4" ,("ydim" ,), compression = compression )
35
- z = ds .createVariable ("z" ,"i4" ,("zdim" ,), compression = compression )
99
+
100
+ x = ds .createVariable ("x" ,"i4" ,("xdim" ,), fill_value = fillvalue , compression = compression )
101
+ y = ds .createVariable ("y" ,"i4" ,("ydim" ,), fill_value = fillvalue , compression = compression )
102
+ z = ds .createVariable ("z" ,"i4" ,("zdim" ,), fill_value = fillvalue , compression = compression )
36
103
37
104
for a ,s in zip ([x ,y ,z ],[1 ,n ,n * n ]):
38
105
a [:] = dd * s
39
106
40
107
dvar = ds .createVariable ("data" ,"f8" ,("xdim" ,"ydim" ,"zdim" ), chunksizes = chunksize , compression = compression )
41
108
dvar [:] = data
109
+
110
+ nm1 ,nm2 = n - 1 , n - 2
111
+ # we use a diffferent set of indices for all the values to be masked
112
+ mindices , findices , vrindices , vm1indices , vm2indices = None , None , None , None , None
113
+ if missing :
114
+ # we use the deprecated missing_value option
115
+ mindices = [(1 ,1 ,1 ),(n / 2 ,1 ,1 ),(1 ,nm1 ,1 ),(nm1 ,1 ,n / 2 )]
116
+ dvar = make_holes (dvar , mindices , 'missing_value' , missing , missing )
117
+
118
+ if fillvalue :
119
+ # note we use a different set of indices for
120
+ findices = [(nm1 ,nm1 ,nm1 ),(n / 2 ,n / 2 ,1 ),(1 ,1 ,n / 2 ),(nm1 ,nm1 ,n / 2 )]
121
+ dvar = make_holes (dvar , findices , '_FillValue' , None , fillvalue )
122
+
123
+ if valid_range and valid_min or valid_range and valid_max :
124
+ raise ValueError ("Can't mix and match validity options" )
125
+
126
+ if valid_min :
127
+ if valid_min == 0.0 :
128
+ raise ValueError ('Dummy data needs a non-zero valid min' )
129
+ vm1indices = [(2 ,2 ,2 ),(n / 2 ,2 ,2 ),(2 ,nm1 ,2 ),(nm1 ,2 ,nm1 / 2 )]
130
+ dvar = make_holes (dvar , vm1indices , 'valid_min' , valid_min , valid_min - abs (0.1 * valid_min ))
42
131
132
+ if valid_max :
133
+ if valid_min == 0.0 :
134
+ raise ValueError ('Dummy data needs a non-zero valid max' )
135
+ vm2indices = [(2 ,nm1 ,2 ),(2 ,2 ,nm1 ),(nm2 ,nm2 ,nm1 ),(nm1 ,nm2 ,n / 2 )]
136
+ dvar = make_holes (dvar , vm2indices , 'valid_max' , valid_max , valid_max * 10 )
137
+
138
+ if valid_range :
139
+ assert len (valid_range ) == 2 and type (valid_range [0 ]) == float
140
+ if valid_range [0 ] == 0.0 or valid_range [1 ] == 0.0 :
141
+ raise ValueError ('Dummy data needs non-zero range bounds' )
142
+ vrindices = [(2 ,nm1 ,nm2 ),(2 ,nm2 ,nm1 ),(nm1 ,nm2 ,nm1 ),(n / 2 ,n / 2 + 1 ,n / 2 )]
143
+ dvar .valid_range = valid_range
144
+ for i ,j ,k in vrindices [0 :2 ]:
145
+ dvar [i ,j ,k ]= valid_range [0 ]- abs (0.1 * valid_range [0 ])
146
+ for i ,j ,k in vrindices [2 :]:
147
+ dvar [i ,j ,k ] = valid_range [1 ]* 10
148
+
43
149
ds .close ()
44
150
45
151
ds = Dataset (filename ,'r' )
46
152
var = ds .variables ['data' ]
47
153
print (f'\n Created file "{ filename } " with a variable called "data" with shape { var .shape } and chunking, compression { var .chunking ()} ,{ compression } \n ' )
48
154
49
-
50
- def make_testzarr_variable_file (filename = 'test.zarr' ):
51
- """
52
- Make a test variable and write to a zarr file.
53
- #FIXME: Not quite sure how to get the chunking right yet
54
- """
55
- dd , data = _make_data ()
56
- zarr .save (filename , x = dd , y = dd * 10 , z = dd * 100 , data = data )
155
+ return mindices , findices , vrindices , vm1indices , vm2indices
57
156
58
157
59
158
if __name__ == "__main__" :
60
- make_test_ncdata ()
159
+ make_vanilla_ncdata ()
160
+ make_validmin_ncdata ()
161
+ make_validmax_ncdata ()
162
+ make_missing_ncdata ()
163
+ make_fillvalue_ncdata ()
164
+ make_validrange_ncdata ()
61
165
0 commit comments