Skip to content

Commit c039589

Browse files
committed
bootstrapping changes
1 parent 7e2d5b1 commit c039589

File tree

8 files changed

+193
-31
lines changed

8 files changed

+193
-31
lines changed

.coveragerc

+3
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,6 @@ exclude_lines =
33
pragma: no cover
44
def __repr__
55
if __name__ == .__main__.:
6+
omit =
7+
*/__init__.py
8+
woudc_data_registry/tests/run_tests.py

Makefile

+8-1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ clean:
5959
rm -f debian/woudc-data-registry.substvars
6060
rm -fr debian/woudc-data-registry
6161

62+
coverage:
63+
coverage run --source=woudc_data_registry -m unittest woudc_data_registry.tests.run_tests
64+
coverage report -m
65+
6266
createdb:
6367
createdb $(PG_FLAGS) -E UTF8
6468
psql $(PG_FLAGS) -c "create extension postgis;"
@@ -69,4 +73,7 @@ dropdb:
6973
package:
7074
python setup.py sdist bdist_wheel
7175

72-
.PHONY: clean createdb dropdb package
76+
test:
77+
python setup.py test
78+
79+
.PHONY: clean coverage createdb dropdb package test

woudc_data_registry/models.py

+55-8
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@
4747
import click
4848

4949
import geoalchemy2
50-
from sqlalchemy import (Column, create_engine, Date, DateTime, Integer, String,
51-
Time, UnicodeText)
50+
from sqlalchemy import (Boolean, Column, create_engine, Date, DateTime, Enum,
51+
Integer, String, Time, UnicodeText)
5252
from sqlalchemy.exc import OperationalError, ProgrammingError
5353
from sqlalchemy.ext.declarative import declarative_base
5454

@@ -69,6 +69,42 @@ def get_col_spec(self):
6969
return '%s(%s,%d)' % (self.name, self.geometry_type, self.srid)
7070

7171

72+
class Contributor(base):
73+
"""Data Registry Contributor"""
74+
75+
__tablename__ = 'contributor'
76+
77+
wmo_region_enum = Enum('I', 'II', 'III', 'IV', 'V', 'VI',
78+
name='wmo_region')
79+
80+
identifier = Column(Integer, primary_key=True, autoincrement=True)
81+
acronym = Column(String, nullable=False)
82+
name = Column(String, nullable=False)
83+
country = Column(String, nullable=False)
84+
wmo_region = Column(wmo_region_enum, nullable=False)
85+
url = Column(String, nullable=False)
86+
email = Column(String, nullable=False)
87+
active = Column(Boolean, nullable=False, default=True)
88+
89+
last_validated_datetime = Column(DateTime, nullable=False,
90+
default=datetime.utcnow())
91+
92+
location = Column(Geometry('POINT', srid=4326), nullable=False)
93+
94+
def __init__(self, dict_):
95+
"""serializer"""
96+
97+
self.acronym = dict_['acronym']
98+
self.name = dict_['name']
99+
self.country = dict_['country']
100+
self.wmo_region = dict_['wmo_region']
101+
self.url = dict_['url']
102+
self.email = dict_['email']
103+
104+
self.location = util.point2ewkt(dict_['location']['longitude'],
105+
dict_['location']['latitude'])
106+
107+
72108
class DataRecord(base):
73109
"""Data Registry Data Record"""
74110

@@ -86,30 +122,40 @@ class DataRecord(base):
86122
data_generation_date = Column(Date, nullable=False)
87123
data_generation_agency = Column(String, nullable=False)
88124
data_generation_version = Column(String, nullable=False)
89-
data_generation_scientific_authority = Column(String)
125+
data_generation_scientific_authority = Column(String, nullable=True)
90126

91127
platform_type = Column(String, default='STN', nullable=False)
92128
platform_id = Column(String, nullable=False)
93129
platform_name = Column(String, nullable=False)
94130
platform_country = Column(String, nullable=False)
95-
platform_gaw_id = Column(String)
131+
platform_gaw_id = Column(String, nullable=True)
96132

97133
instrument_name = Column(String, nullable=False)
98134
instrument_model = Column(String, nullable=False)
99135
instrument_number = Column(String, nullable=False)
100136

101-
location = Column(Geometry(srid=0))
137+
location = Column(Geometry(srid=0), nullable=False)
102138

103139
timestamp_utcoffset = Column(String, nullable=False)
104140
timestamp_date = Column(Date, nullable=False)
105-
timestamp_time = Column(Time)
141+
timestamp_time = Column(Time, nullable=True)
106142

107143
# data management fields
108144

109-
insert_datetime = Column(DateTime, nullable=False,
110-
default=datetime.utcnow())
145+
published = Column(Boolean, nullable=False, default=False)
146+
147+
received_datetime = Column(DateTime, nullable=False,
148+
default=datetime.utcnow())
149+
150+
inserted_datetime = Column(DateTime, nullable=False,
151+
default=datetime.utcnow())
152+
111153
processed_datetime = Column(DateTime, nullable=False,
112154
default=datetime.utcnow())
155+
156+
published_datetime = Column(DateTime, nullable=False,
157+
default=datetime.utcnow())
158+
113159
raw = Column(UnicodeText, nullable=False)
114160
url = Column(String, nullable=False)
115161

@@ -151,6 +197,7 @@ def __init__(self, ecsv):
151197
self.location = util.point2ewkt(ecsv.extcsv['LOCATION']['Longitude'],
152198
ecsv.extcsv['LOCATION']['Latitude'],
153199
ecsv.extcsv['LOCATION']['Height'])
200+
154201
self.extcsv = ecsv.extcsv
155202
self.raw = ecsv._raw
156203

woudc_data_registry/parser.py

+38-14
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,29 @@
5858
'invalid_data': 12000,
5959
}
6060

61+
DOMAINS = {
62+
'datasets': {
63+
'Broad-band',
64+
'Lidar',
65+
'Multi-band',
66+
'OzoneSonde',
67+
'RocketSonde',
68+
'Spectral',
69+
'SurfaceOzone',
70+
'TotalOzoneObs',
71+
'TotalOzone',
72+
'UmkehrN14',
73+
},
74+
'metadata_tables': {
75+
'CONTENT': ['Class', 'Category', 'Level', 'Form'],
76+
'DATA_GENERATION': ['Date', 'Agency', 'Version'],
77+
'PLATFORM': ['Type', 'ID', 'Name', 'Country'],
78+
'INSTRUMENT': ['Name', 'Model', 'Number'],
79+
'LOCATION': ['Latitude', 'Longitude', 'Height'],
80+
'TIMESTAMP': ['UTCOffset', 'Date']
81+
}
82+
}
83+
6184

6285
def _get_value_type(field, value):
6386
"""derive true type from data value"""
@@ -101,15 +124,6 @@ def __init__(self, content):
101124
self.extcsv = {}
102125
self._raw = None
103126

104-
self.metadata_tables = {
105-
'CONTENT': ['Class', 'Category', 'Level', 'Form'],
106-
'DATA_GENERATION': ['Date', 'Agency', 'Version'],
107-
'PLATFORM': ['Type', 'ID', 'Name', 'Country'],
108-
'INSTRUMENT': ['Name', 'Model', 'Number'],
109-
'LOCATION': ['Latitude', 'Longitude', 'Height'],
110-
'TIMESTAMP': ['UTCOffset', 'Date']
111-
}
112-
113127
LOGGER.debug('Reading into csv')
114128
self._raw = content
115129
reader = csv.reader(StringIO(self._raw))
@@ -121,7 +135,7 @@ def __init__(self, content):
121135
for row in reader:
122136
if len(row) == 1 and row[0].startswith('#'): # table name
123137
table_name = row[0].replace('#', '')
124-
if table_name in self.metadata_tables.keys():
138+
if table_name in DOMAINS['metadata_tables'].keys():
125139
found_table = True
126140
LOGGER.debug('Found new table %s', table_name)
127141
self.extcsv[table_name] = {}
@@ -136,7 +150,7 @@ def __init__(self, content):
136150
LOGGER.debug('Found blank line')
137151
continue
138152
else: # process row data
139-
if table_name in self.metadata_tables.keys():
153+
if table_name in DOMAINS['metadata_tables'].keys():
140154
self.extcsv[table_name]['_line_num'] = \
141155
int(reader.line_num + 1)
142156
for idx, val in enumerate(row):
@@ -153,11 +167,11 @@ def validate_metadata(self):
153167

154168
errors = []
155169

156-
missing_tables = list(set(self.metadata_tables) -
170+
missing_tables = list(set(DOMAINS['metadata_tables']) -
157171
set(self.extcsv.keys()))
158172

159173
if missing_tables:
160-
if not list(set(self.metadata_tables) - set(missing_tables)):
174+
if not list(set(DOMAINS['metadata_tables']) - set(missing_tables)):
161175
msg = 'No core metadata tables found. Not an Extended CSV file'
162176
raise NonStandardDataError(msg)
163177

@@ -172,7 +186,7 @@ def validate_metadata(self):
172186
errors)
173187

174188
for key, value in self.extcsv.items():
175-
missing_datas = list(set(self.metadata_tables[key]) -
189+
missing_datas = list(set(DOMAINS['metadata_tables'][key]) -
176190
set(value.keys()))
177191

178192
if missing_datas:
@@ -205,6 +219,16 @@ def validate_metadata(self):
205219
self.extcsv['LOCATION']['_line_num'])
206220
})
207221

222+
if self.extcsv['CONTENT']['Category'] not in DOMAINS['datasets']:
223+
errors.append({
224+
'code': 'invalid_data',
225+
'locator': 'CONTENT.Category',
226+
'text': 'ERROR: {}: {} (line number: {})'.format(
227+
ERROR_CODES['invalid_data'],
228+
self.extcsv['CONTENT']['Category'],
229+
self.extcsv['LOCATION']['_line_num'])
230+
})
231+
208232
if errors:
209233
raise MetadataValidationError('Invalid metadata', errors)
210234

woudc_data_registry/processing.py

+2
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def process_data(self, infile, verify=False):
8282

8383
data_record = None
8484

85+
LOGGER.info('Detecting file')
8586
if not is_text_file(infile):
8687
self.status = 'failed'
8788
self.code = 'NonStandardDataError'
@@ -118,6 +119,7 @@ def process_data(self, infile, verify=False):
118119
LOGGER.error('Invalid Extended CSV: {}'.format(err.errors))
119120
return False
120121

122+
LOGGER.info('Verifying data record against registry')
121123
# verify:
122124
# - Extended CSV core fields against registry
123125
# - taxonomy/URI check
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
2+
#CONTENT
3+
Class,Category,Level,Form
4+
WOUDC,TotalOzoneFOO,1.0,1
5+
6+
#DATA_GENERATION
7+
Date,Agency,Version,ScientificAuthority
8+
2012-01-04,RMDA,0.0
9+
10+
#PLATFORM
11+
Type,ID,Name,Country,GAW_ID
12+
STN,002,Tamanrasset,DZA
13+
14+
#INSTRUMENT
15+
Name,Model,Number
16+
Brewer,MKIII,201
17+
18+
#LOCATION
19+
Latitude,Longitude,Height
20+
22.780,95.520,1384
21+
22+
#TIMESTAMP
23+
UTCOffset,Date,Time
24+
00:00:00,2011-11-01
25+
26+
#DAILY
27+
Date,WLCode,ObsCode,ColumnO3,StdDevO3,UTC_Begin,UTC_End,UTC_Mean,NObs,mMu,ColumnSO2
28+
2011-11-01,9,DS,265.8,2.4,6.37,16.32,11.15,91,1.785,-7.6
29+
2011-11-02,9,DS,266.6,2.2,6.37,16.20,11.27,99,1.754,-7.9
30+
2011-11-03,9,DS,273.2,3.4,6.38,16.38,11.47,99,1.782,-7.8
31+
2011-11-04,9,DS,269.7,2.5,6.53,16.30,11.15,93,1.747,-7.8
32+
2011-11-05,9,DS,266.4,2.6,6.40,16.28,11.18,86,1.750,-7.6
33+
2011-11-06,9,DS,262.5,2.8,6.73,15.87,11.29,92,1.746,-7.8
34+
2011-11-07,9,DS,262.6,2.6,6.57,16.13,11.27,89,1.814,-7.1
35+
2011-11-08,9,DS,259.2,2.0,6.55,16.25,11.48,97,1.791,-7.2
36+
2011-11-09,9,DS,260.9,2.9,6.58,16.23,11.38,92,1.823,-7.0
37+
2011-11-10,9,DS,262.4,3.0,6.45,16.17,11.34,95,1.843,-7.0
38+
2011-11-11,9,DS,265.6,2.3,6.63,15.77,11.27,91,1.810,-7.1
39+
2011-11-12,9,DS,268.9,3.9,6.47,16.23,10.92,89,1.805,-7.3
40+
2011-11-13,9,DS,261.6,2.7,6.65,16.18,12.12,67,2.018,-6.1
41+
2011-11-14,9,DS,256.7,2.4,7.32,15.95,11.90,80,1.822,-7.5
42+
2011-11-15,9,DS,269.9,4.4,6.90,16.17,11.71,89,1.807,-7.5
43+
2011-11-16,9,DS,274.5,3.1,6.65,15.97,11.40,95,1.834,-7.2
44+
2011-11-17,9,DS,274.6,3.3,7.55,15.37,11.10,67,1.632,-8.5
45+
2011-11-18,9,DS,270.3,1.6,8.35,15.83,13.01,52,1.728,-7.6
46+
2011-11-19,9,DS,266.2,2.5,6.68,15.93,11.34,90,1.917,-6.5
47+
2011-11-20,9,DS,263.1,2.9,6.57,16.12,11.29,90,1.921,-6.7
48+
2011-11-21,9,DS,263.5,3.4,6.57,16.12,11.53,85,1.892,-7.2
49+
2011-11-22,9,DS,260.5,2.5,6.58,16.17,11.33,88,1.993,-6.5
50+
2011-11-23,9,DS,259.4,2.9,6.77,16.05,11.56,90,1.932,-6.9
51+
2011-11-24,9,DS,256.2,2.4,6.60,15.98,11.47,91,1.938,-6.9
52+
2011-11-25,9,DS,258.2,2.5,6.82,16.08,11.40,90,1.902,-7.1
53+
2011-11-26,9,DS,256.8,2.4,6.77,16.17,11.33,81,1.962,-7.0
54+
2011-11-27,9,DS,254.8,2.3,6.98,15.80,11.03,74,1.921,-7.2
55+
2011-11-28,9,DS,255.2,3.0,6.97,15.98,11.52,84,1.889,-7.2
56+
2011-11-29,9,DS,256.3,1.8,6.80,15.77,11.37,66,1.934,-7.1
57+
2011-11-30,9,DS,262.0,3.1,6.98,15.77,12.52,49,2.103,-5.7
58+
59+
#TIMESTAMP
60+
UTCOffset,Date,Time
61+
00:00:00,2011-11-30
62+
63+
#MONTHLY
64+
Date,ColumnO3,StdDevO3,Npts
65+
2011-11-01,263.5,5.7,30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
これはEUC-JPでコーディングされた日本語のファイルです。

0 commit comments

Comments
 (0)