Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update automatic metadata creation #393

Draft
wants to merge 12 commits into
base: develop
Choose a base branch
from
64 changes: 23 additions & 41 deletions open_mastr/soap_api/metadata/create.py
Original file line number Diff line number Diff line change
@@ -69,8 +69,9 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
"id": str(uuid.uuid4()),
"description": f"Raw data download Marktstammdatenregister (MaStR) data using the webservice.\n\n{description_extra}",
"language": ["en-GB", "de-DE"],
"subject": [{"name": None, "path": None}],
"keywords": ["powerplants", "renewables"],
"created": publication_date,
"publicationDate": publication_date,
"version": data_version,
"context": {
"homepage": "https://www.marktstammdatenregister.de/MaStR/",
@@ -85,13 +86,15 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
"spatial": {"location": None, "extent": "Germany", "resolution": "vector"},
"temporal": {
"referenceDate": reference_date.strftime("%Y-%m-%d %H:%M:%S"),
"timeseries": {
"timeseries": [
{
"start": None,
"end": None,
"resolution": None,
"alignment": None,
"aggregationType": None,
},
}
]
},
"sources": [
{
@@ -106,7 +109,7 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
"instruction": "You are free: To Share, To Create, To Adapt; As long as you: Attribute",
"attribution": f"© Marktstammdatenregister {datetime.date.today().year} | dl-de/by-2-0",
}
],
]
},
{
"title": "RLI - open_MaStR",
@@ -120,8 +123,8 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
"instruction": "You are free: To Share, To Create, To Adapt; As long as you: Attribute, Share-Alike, Keep open!",
"attribution": "open_MaStR © Reiner Lemoine Institut | AGPL-3.0",
}
],
},
]
}
],
"licenses": [
{
@@ -134,30 +137,16 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
],
"contributors": [
{
"title": "Ludee",
"title": None,
"email": None,
"path": "https://github.com/ludee",
"role": "maintainer",
"organization": "Reiner Lemoine Institut gGmbH",
},
{
"title": "Guido Pleßmann",
"email": None,
"path": "https://gplssm.de",
"role": "maintainer",
"organization": "Reiner Lemoine Institut gGmbH",
},
{
"title": "oakca",
"email": None,
"path": "https://github.com/oakca",
"role": "contributor",
"organization": "Reiner Lemoine Institut gGmbH",
},
"date": None,
"object": None,
"comment": None
}
],
"review": {"path": None, "badge": None},
"metaMetadata": {
"metadataVersion": "OEP-1.4.0",
"metadataVersion": "OEP-1.5.2",
"metadataLicense": {
"name": "CC0-1.0",
"title": "Creative Commons Zero v1.0 Universal",
@@ -172,6 +161,7 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
"licenses": "License name must follow the SPDX License List (https://spdx.org/licenses/)",
"review": "Following the OEP Data Review (https://github.com/OpenEnergyPlatform/data-preprocessing/wiki)",
"null": "If not applicable use (null)",
"todo": "If a value ist not yet available, use: todo"
},
}

@@ -253,50 +243,44 @@ def create_datapackage_meta_json(
resource = {
"profile": "tabular-data-resource",
"name": f"bnetza_mastr_{tech}_raw",
"title": f"open-MaStR {tech} units (raw)",
"path": filenames["raw"][tech]["joined"],
"scheme": "file",
"format": "csv",
"encoding": "utf-8",
"mediatype": "text/csv",
"schema": {
"fields": raw_fields,
"primaryKey": ["EinheitMastrNummer"],
},
"dialect": {"delimiter": ","},
}

resources_meta["resources"].append(resource)
if "cleaned" in data:
resource = {
"profile": "tabular-data-resource",
"name": f"bnetza_mastr_{tech}_cleaned",
"title": f"open-MaStR {tech} units (cleaned)",
"path": filenames["cleaned"][tech],
"scheme": "file",
"format": "csv",
"encoding": "utf-8",
"mediatype": "text/csv",
"schema": {
"fields": raw_fields,
"primaryKey": ["EinheitMastrNummer"],
},
"dialect": {"delimiter": ","},
}

resources_meta["resources"].append(resource)
if "postprocessed" in data:
processed_fields = [
{
"name": "geom",
"unit": None,
"type": "str",
"desciption": "Standort der Anlage als Punktgeometrie im WKB Format",
"description": "Standort der Anlage als Punktgeometrie im WKB Format",
"examples": "0101000020e610000071fbe59315131c40a2b437f8c20e4a40",
},
{
"name": "comment",
"unit": None,
"type": "str",
"desciption": "Information about data post-processing",
"description": "Information about data post-processing",
"examples": "has_geom; outside_vg250",
},
]
@@ -306,7 +290,7 @@ def create_datapackage_meta_json(
"name": "tags",
"unit": None,
"type": "json",
"desciption": "Data insights and report about post-processing steps",
"description": "Data insights and report about post-processing steps",
"examples": {
"plz_check": False,
"processed": True,
@@ -319,18 +303,16 @@ def create_datapackage_meta_json(
"name": "geom",
"unit": None,
"type": "str",
"desciption": "Standort der Anlage als Punktgeometrie im WKB Format (EPSG 3035)",
"description": "Standort der Anlage als Punktgeometrie im WKB Format (EPSG 3035)",
"examples": "0101000020e610000071fbe59315131c40a2b437f8c20e4a40",
}
)
resource = {
"profile": "tabular-data-resource",
"name": f"bnetza_mastr_{tech}",
"title": f"open-MaStR {tech} units",
"path": filenames["postprocessed"][tech],
"scheme": "file",
"format": "csv",
"encoding": "utf-8",
"mediatype": "text/csv",
"schema": {
"fields": raw_fields + processed_fields,
"primaryKey": ["EinheitMastrNummer"],
70 changes: 46 additions & 24 deletions open_mastr/soap_api/metadata/description.py
Original file line number Diff line number Diff line change
@@ -33,19 +33,19 @@ def __init__(self, xml=None):
self.xml = fh.read()
else:
# If no XML file is given, the file is read from an URL
zipurl = 'https://www.marktstammdatenregister.de/MaStRHilfe/files/' \
'webdienst/Dienstbeschreibung_1_2_39_Produktion.zip'
zipurl = "https://www.marktstammdatenregister.de/MaStRHilfe/files/webdienst/" \
"Dienstbeschreibung_Produktion_Version" \
"1.2.87" \ # update version here
".zip"

with urlopen(zipurl) as zipresp:
with ZipFile(BytesIO(zipresp.read())) as zfile:
self.xml = zfile.read('xsd/mastrbasetypes.xsd')


self.xml = zfile.read("xsd/mastrbasetypes.xsd")

# Parse XML and extract relevant data
parsed = xmltodict.parse(self.xml, process_namespaces=False)
self.complex_types = parsed['schema']["complexType"]
self.simple_types = parsed['schema']["simpleType"]
self.complex_types = parsed["schema"]["complexType"]
self.simple_types = parsed["schema"]["simpleType"]

# Prepare parsed data for documentational purposes
abstract_types, parameters, responses, types = self._filter_type_descriptions()
@@ -78,13 +78,17 @@ def _filter_type_descriptions(self):
raise ValueError("Ohh...")
else:
# Filter all functions
if item["@name"].startswith(("Get", "Set", "Erneute", "Verschiebe", "Delete")):
if item["@name"].startswith(
("Get", "Set", "Erneute", "Verschiebe", "Delete")
):
functions.append(item)

# Further split the list of functions into paramters and responses
if item["@name"].endswith("Parameter"):
if "complexContent" in item.keys():
parameters[item["@name"]] = item["complexContent"]["extension"]
parameters[item["@name"]] = item["complexContent"][
"extension"
]
else:
parameters[item["@name"]] = item
elif item["@name"].endswith("Antwort"):
@@ -111,12 +115,14 @@ def prepare_simple_type(self):

for simple_type in self.simple_types:
if "enumeration" in simple_type["restriction"]:
possible_values = [_["@value"] for _ in simple_type["restriction"]["enumeration"]]
possible_values = [
_["@value"] for _ in simple_type["restriction"]["enumeration"]
]
else:
possible_values = []
simple_types_doc[simple_type["@name"]] = {
"type": simple_type["restriction"]["@base"],
"values": possible_values
"values": possible_values,
}
return simple_types_doc

@@ -140,49 +146,61 @@ def functions_data_documentation(self):
if "annotation" in fcn["sequence"]["element"]:
fcn_data = [fcn["sequence"]["element"]]
else:
fcn_data = self.types[fcn["sequence"]["element"]["@type"].split(":")[1]]["sequence"]["element"]
fcn_data = self.types[
fcn["sequence"]["element"]["@type"].split(":")[1]
]["sequence"]["element"]
else:
print(type(fcn["sequence"]))
print(fcn["sequence"])
raise ValueError

# Add data for inherited columns from base types
if "@base" in fcn:
if not fcn["@base"] == 'mastr:AntwortBasis':
fcn_data = _collect_columns_of_base_type(self.types, fcn["@base"].split(":")[1], fcn_data)
if not fcn["@base"] == "mastr:AntwortBasis":
fcn_data = _collect_columns_of_base_type(
self.types, fcn["@base"].split(":")[1], fcn_data
)
function_docs[fcn_name] = {}
for column in fcn_data:
# Replace MaStR internal types with more general ones
if column["@type"].startswith("mastr:"):
try:
column_type = self.simple_types_prepared[column["@type"].split(":")[1]]["type"]
column_type = self.simple_types_prepared[
column["@type"].split(":")[1]
]["type"]
except KeyError:
column_type = column["@type"]
else:
column_type = column["@type"]

if "annotation" in column.keys():
description = column["annotation"]["documentation"].get("#text", None)
description = column["annotation"]["documentation"].get(
"#text", None
)
if description:
description = re.sub(" +", " ", description.replace("\n", ""))
description = re.sub(
" +", " ", description.replace("\n", "")
)
function_docs[fcn_name][column["@name"]] = {
"type": column_type,
"description": description,
"example": column["annotation"]["documentation"].get("m-ex", None)
"type": column_type,
"description": description,
"example": column["annotation"]["documentation"].get(
"m-ex", None
),
}
else:
function_docs[fcn_name][column["@name"]] = {
"type": column_type,
# TODO: insert information from simple type here
"description": None,
"example": None
"example": None,
}

# Hack in a descrition for a column that gets created after download while flattening data
function_docs["GetEinheitWind"]["HerstellerId"] = {
"type": "str",
"description": "Id des Herstellers der Einheit",
"example": 923
"example": 923,
}

return function_docs
@@ -193,7 +211,11 @@ def _collect_columns_of_base_type(base_types, base_type_name, fcn_data):
fcn_data += type_description["extension"]["sequence"]["element"]

if "@base" in type_description["extension"]:
if not type_description["extension"]["@base"] == 'mastr:AntwortBasis':
fcn_data = _collect_columns_of_base_type(base_types, type_description["extension"]["@base"].split(":")[1], fcn_data)
if not type_description["extension"]["@base"] == "mastr:AntwortBasis":
fcn_data = _collect_columns_of_base_type(
base_types,
type_description["extension"]["@base"].split(":")[1],
fcn_data,
)

return fcn_data
Loading