Skip to content

Commit 0ddc743

Browse files
committed
scaffolding changes
1 parent 4dd8ad6 commit 0ddc743

23 files changed

+5960
-96
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,6 @@ ENV/
9797

9898
# mypy
9999
.mypy_cache/
100+
101+
# debian artifacts
102+
.pybuild

Makefile

+8
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ include $(ENVFILE)
4747

4848
PG_FLAGS=-h $(DB_HOST) -p $(DB_PORT) $(DB_NAME) -U $(DB_USERNAME)
4949

50+
clean:
51+
rm -fr .pybuild
52+
rm -f debian/files
53+
rm -f debian/woudc-data-registry.postinst.debhelper
54+
rm -f debian/woudc-data-registry.prerm.debhelper
55+
rm -f debian/woudc-data-registry.substvars
56+
rm -fr debian/woudc-data-registry
57+
5058
createdb:
5159
createdb $(PG_FLAGS) -E UTF8
5260
psql $(PG_FLAGS) -c "create extension postgis;"

README.md

+18
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ python setup.py install
3636
# set system environment variables
3737
cp default.env foo.env
3838
vi foo.env # edit database connection parameters, etc.
39+
. foo.env
3940

4041
# create database
4142
make ENVFILE=foo.env createdb
@@ -57,6 +58,23 @@ make ENVFILE=foo.env dropdb
5758

5859
```
5960

61+
### Running woudc-data-registry
62+
63+
```bash
64+
# ingest directory of files (walks directory recursively)
65+
woudc-data-registry ingest -d /path/to/dir
66+
67+
# ingest single file
68+
woudc-data-registry ingest -f foo.dat
69+
70+
# verify directory of files (walks directory recursively)
71+
woudc-data-registry ingest -d /path/to/dir --verify
72+
73+
# verify single file
74+
woudc-data-registry ingest -f foo.dat --verify
75+
76+
```
77+
6078
### Running Tests
6179
TODO
6280

debian/changelog

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
woudc-data-registry (0.1.0) trusty; urgency=medium
2+
3+
* Initial release.
4+
5+
-- Tom Kralidis <[email protected]> Fri, 28 Jul 2017 00:00:37 +0000

debian/compat

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
9

debian/control

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Source: woudc-data-registry
2+
Section: python
3+
Priority: optional
4+
Maintainer: WOUDC <[email protected]>
5+
Build-Depends: debhelper (>= 9), python3, python3-setuptools
6+
Standards-Version: 3.9.5
7+
X-Python-Version: >= 3.4
8+
Vcs-Git: https://github.com/woudc/woudc-data-registry.git
9+
10+
Package: woudc-data-registry
11+
Architecture: all
12+
Depends: python3-click, python-geoalchemy2, python3-psycopg2, python3-requests, python3-six, python3-sqlalchemy
13+
Homepage: http://woudc.org
14+
Description: WOUDC Data Registry is a platform that manages Ozone and
15+
Ultraviolet Radiation data in support of the World Ozone and Ultraviolet
16+
Radiation Data Centre (WOUDC), one of six World Data Centres as part of the
17+
Global Atmosphere Watch programme of the WMO.

debian/copyright

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
2+
Source: https://github.com/woudc/woudc-data-registry
3+
4+
Files: *
5+
Copyright: Copyright (c) 2017 Government of Canada
6+
License: Expat
7+
Permission is hereby granted, free of charge, to any person
8+
obtaining a copy of this software and associated documentation
9+
files (the "Software"), to deal in the Software without
10+
restriction, including without limitation the rights to use,
11+
copy, modify, merge, publish, distribute, sublicense, and/or
12+
sell copies of the Software, and to permit persons to whom
13+
the Software is furnished to do so, subject to the following
14+
conditions:
15+
.
16+
The above copyright notice and this permission notice shall be
17+
included in all copies or substantial portions of the Software.
18+
.
19+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26+
OTHER DEALINGS IN THE SOFTWARE.

debian/rules

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/usr/bin/make -f
2+
# -*- makefile -*-
3+
4+
%:
5+
dh $@ --with python3 --buildsystem=pybuild

debian/source/format

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.0 (quilt)

requirements-dev.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
alembic
2+
coverage
23
docutils
34
flake8
45
pypandoc

setup.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import re
5050
import sys
5151

52+
5253
class PyTest(Command):
5354
user_options = []
5455

@@ -60,9 +61,11 @@ def finalize_options(self):
6061

6162
def run(self):
6263
import subprocess
63-
errno = subprocess.call([sys.executable, 'tests/run_tests.py'])
64+
errno = subprocess.call([sys.executable,
65+
'woudc_data_registry/tests/run_tests.py'])
6466
raise SystemExit(errno)
6567

68+
6669
def read(filename, encoding='utf-8'):
6770
"""read file contents"""
6871
full_path = os.path.join(os.path.dirname(__file__), filename)

woudc_data_registry/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
import click
4747

4848
from woudc_data_registry.models import setup_models, teardown_models
49-
from woudc_data_registry.processing import process
49+
from woudc_data_registry.controller import ingest
5050

5151
__version__ = '0.1.dev0'
5252

@@ -59,4 +59,4 @@ def cli():
5959

6060
cli.add_command(setup_models)
6161
cli.add_command(teardown_models)
62-
cli.add_command(process)
62+
cli.add_command(ingest)

woudc_data_registry/controller.py

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# =================================================================
2+
#
3+
# Terms and Conditions of Use
4+
#
5+
# Unless otherwise noted, computer program source code of this
6+
# distribution # is covered under Crown Copyright, Government of
7+
# Canada, and is distributed under the MIT License.
8+
#
9+
# The Canada wordmark and related graphics associated with this
10+
# distribution are protected under trademark law and copyright law.
11+
# No permission is granted to use them outside the parameters of
12+
# the Government of Canada's corporate identity program. For
13+
# more information, see
14+
# http://www.tbs-sct.gc.ca/fip-pcim/index-eng.asp
15+
#
16+
# Copyright title to all 3rd party software distributed with this
17+
# software is held by the respective copyright holders as noted in
18+
# those files. Users are asked to read the 3rd Party Licenses
19+
# referenced with those assets.
20+
#
21+
# Copyright (c) 2017 Government of Canada
22+
#
23+
# Permission is hereby granted, free of charge, to any person
24+
# obtaining a copy of this software and associated documentation
25+
# files (the "Software"), to deal in the Software without
26+
# restriction, including without limitation the rights to use,
27+
# copy, modify, merge, publish, distribute, sublicense, and/or sell
28+
# copies of the Software, and to permit persons to whom the
29+
# Software is furnished to do so, subject to the following
30+
# conditions:
31+
#
32+
# The above copyright notice and this permission notice shall be
33+
# included in all copies or substantial portions of the Software.
34+
#
35+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
37+
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
38+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
39+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
40+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
41+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
42+
# OTHER DEALINGS IN THE SOFTWARE.
43+
#
44+
# =================================================================
45+
46+
import logging
47+
import os
48+
49+
import click
50+
51+
from woudc_data_registry.processing import Process
52+
53+
LOGGER = logging.getLogger(__name__)
54+
55+
56+
def orchestrate(file_, directory, verify=False):
57+
"""core workflow"""
58+
59+
files_to_process = []
60+
61+
if file_ is not None:
62+
files_to_process = [file_]
63+
elif directory is not None:
64+
for root, dirs, files in os.walk(directory):
65+
for f in files:
66+
files_to_process.append(os.path.join(root, f))
67+
68+
for file_to_process in files_to_process:
69+
click.echo('Processing filename: {}'.format(file_to_process))
70+
p = Process()
71+
result = p.process_data(file_to_process, verify=verify)
72+
73+
if result: # processed
74+
if verify:
75+
LOGGER.info('Verified but not ingested')
76+
else:
77+
LOGGER.info('Ingested successfully')
78+
else:
79+
LOGGER.info('Not ingested')
80+
81+
82+
@click.command()
83+
@click.pass_context
84+
@click.option('--file', '-f', 'file_',
85+
type=click.Path(exists=True, resolve_path=True),
86+
help='Path to data record')
87+
@click.option('--directory', '-d', 'directory',
88+
type=click.Path(exists=True, resolve_path=True,
89+
dir_okay=True, file_okay=False),
90+
help='Path to directory of data records')
91+
@click.option('--verify', is_flag=True)
92+
def ingest(ctx, file_, directory, verify):
93+
"""ingest a single data submission or directory of files"""
94+
95+
if file_ is not None and directory is not None:
96+
msg = '--file and --directory are mutually exclusive'
97+
raise click.ClickException(msg)
98+
99+
if file_ is None and directory is None:
100+
msg = 'One of --file or --directory is required'
101+
raise click.ClickException(msg)
102+
103+
orchestrate(file_, directory, verify)

woudc_data_registry/models.py

+32-12
Original file line numberDiff line numberDiff line change
@@ -45,20 +45,30 @@
4545

4646
from datetime import datetime
4747
import click
48-
import logging
4948

50-
from geoalchemy2 import Geometry
49+
import geoalchemy2
5150
from sqlalchemy import (Column, create_engine, Date, DateTime, Integer, String,
5251
Time, UnicodeText)
5352
from sqlalchemy.exc import OperationalError, ProgrammingError
5453
from sqlalchemy.ext.declarative import declarative_base
5554

5655
from woudc_data_registry import util
5756

58-
LOGGER = logging.getLogger(__name__)
5957
base = declarative_base()
6058

6159

60+
class Geometry(geoalchemy2.types.Geometry):
61+
"""
62+
multi-geometry class workaround
63+
TODO: remove when https://github.com/geoalchemy/geoalchemy2/issues/158
64+
is fixed
65+
"""
66+
def get_col_spec(self):
67+
if self.geometry_type == 'GEOMETRY' and self.srid == 0:
68+
return self.name
69+
return '%s(%s,%d)' % (self.name, self.geometry_type, self.srid)
70+
71+
6272
class DataRecord(base):
6373
"""Data Registry Data Record"""
6474

@@ -72,19 +82,24 @@ class DataRecord(base):
7282
content_category = Column(String, nullable=False)
7383
content_level = Column(String, nullable=False)
7484
content_form = Column(String, nullable=False)
85+
7586
data_generation_date = Column(Date, nullable=False)
7687
data_generation_agency = Column(String, nullable=False)
7788
data_generation_version = Column(String, nullable=False)
7889
data_generation_scientific_authority = Column(String)
90+
7991
platform_type = Column(String, default='STN', nullable=False)
8092
platform_id = Column(String, nullable=False)
8193
platform_name = Column(String, nullable=False)
8294
platform_country = Column(String, nullable=False)
8395
platform_gaw_id = Column(String)
96+
8497
instrument_name = Column(String, nullable=False)
8598
instrument_model = Column(String, nullable=False)
8699
instrument_number = Column(String, nullable=False)
87-
location = Column(Geometry(management=True, use_typemod=False, srid=4326))
100+
101+
location = Column(Geometry(srid=0))
102+
88103
timestamp_utcoffset = Column(String, nullable=False)
89104
timestamp_date = Column(Date, nullable=False)
90105
timestamp_time = Column(Time)
@@ -101,7 +116,6 @@ class DataRecord(base):
101116
def __init__(self, ecsv):
102117
"""serializer"""
103118

104-
LOGGER.debug('Serializing model')
105119
self.content_class = ecsv.extcsv['CONTENT']['Class']
106120
self.content_category = ecsv.extcsv['CONTENT']['Category']
107121
self.content_level = ecsv.extcsv['CONTENT']['Level']
@@ -111,26 +125,32 @@ def __init__(self, ecsv):
111125
self.data_generation_agency = ecsv.extcsv['DATA_GENERATION']['Agency']
112126
self.data_generation_version = \
113127
ecsv.extcsv['DATA_GENERATION']['Version']
114-
self.data_generation_scientific_authority = \
115-
ecsv.extcsv['DATA_GENERATION']['ScientificAuthority']
128+
129+
if 'ScientificAuthority' in ecsv.extcsv['DATA_GENERATION']:
130+
self.data_generation_scientific_authority = \
131+
ecsv.extcsv['DATA_GENERATION']['ScientificAuthority']
116132

117133
self.platform_type = ecsv.extcsv['PLATFORM']['Type']
118134
self.platform_id = ecsv.extcsv['PLATFORM']['ID']
119135
self.platform_name = ecsv.extcsv['PLATFORM']['Name']
120136
self.platform_country = ecsv.extcsv['PLATFORM']['Country']
121-
self.platform_gaw_id = ecsv.extcsv['PLATFORM']['GAW_ID']
137+
138+
if 'GAW_ID' in ecsv.extcsv['PLATFORM']:
139+
self.platform_gaw_id = ecsv.extcsv['PLATFORM']['GAW_ID']
122140

123141
self.instrument_name = ecsv.extcsv['INSTRUMENT']['Name']
124142
self.instrument_model = ecsv.extcsv['INSTRUMENT']['Model']
125143
self.instrument_number = ecsv.extcsv['INSTRUMENT']['Number']
126144

127145
self.timestamp_utcoffset = ecsv.extcsv['TIMESTAMP']['UTCOffset']
128146
self.timestamp_date = ecsv.extcsv['TIMESTAMP']['Date']
129-
self.timestamp_time = ecsv.extcsv['TIMESTAMP']['Time']
130147

131-
self.location = util.point2wkt(ecsv.extcsv['LOCATION']['Longitude'],
132-
ecsv.extcsv['LOCATION']['Latitude'],
133-
ecsv.extcsv['LOCATION']['Height'])
148+
if 'Time' in ecsv.extcsv['TIMESTAMP']:
149+
self.timestamp_time = ecsv.extcsv['TIMESTAMP']['Time']
150+
151+
self.location = util.point2ewkt(ecsv.extcsv['LOCATION']['Longitude'],
152+
ecsv.extcsv['LOCATION']['Latitude'],
153+
ecsv.extcsv['LOCATION']['Height'])
134154
self.extcsv = ecsv.extcsv
135155
self.raw = ecsv._raw
136156

0 commit comments

Comments
 (0)