OpenEnergyPlatform · chrwm · Dec 1, 2022 · Dec 1, 2022 · Dec 1, 2022 · Mar 6, 2023
diff --git a/open_mastr/soap_api/metadata/create.py b/open_mastr/soap_api/metadata/create.py
@@ -69,8 +69,9 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
         "id": str(uuid.uuid4()),
         "description": f"Raw data download Marktstammdatenregister (MaStR) data using the webservice.\n\n{description_extra}",
         "language": ["en-GB", "de-DE"],
+        "subject": [{"name": None, "path": None}],
         "keywords": ["powerplants", "renewables"],
-        "created": publication_date,
+        "publicationDate": publication_date,
         "version": data_version,
         "context": {
             "homepage": "https://www.marktstammdatenregister.de/MaStR/",
@@ -85,13 +86,15 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
         "spatial": {"location": None, "extent": "Germany", "resolution": "vector"},
         "temporal": {
             "referenceDate": reference_date.strftime("%Y-%m-%d %H:%M:%S"),
-            "timeseries": {
+            "timeseries": [
+                {
                 "start": None,
                 "end": None,
                 "resolution": None,
                 "alignment": None,
                 "aggregationType": None,
-            },
+                }
+            ]
         },
         "sources": [
             {
@@ -106,7 +109,7 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
                         "instruction": "You are free: To Share, To Create, To Adapt; As long as you: Attribute",
                         "attribution": f"© Marktstammdatenregister {datetime.date.today().year} | dl-de/by-2-0",
                     }
-                ],
+                ]
             },
             {
                 "title": "RLI - open_MaStR",
@@ -120,8 +123,8 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
                         "instruction": "You are free: To Share, To Create, To Adapt; As long as you: Attribute, Share-Alike, Keep open!",
                         "attribution": "open_MaStR © Reiner Lemoine Institut | AGPL-3.0",
                     }
-                ],
-            },
+                ]
+            }
         ],
         "licenses": [
             {
@@ -134,30 +137,16 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
         ],
         "contributors": [
             {
-                "title": "Ludee",
+                "title": None,
                 "email": None,
-                "path": "https://github.com/ludee",
-                "role": "maintainer",
-                "organization": "Reiner Lemoine Institut gGmbH",
-            },
-            {
-                "title": "Guido Pleßmann",
-                "email": None,
-                "path": "https://gplssm.de",
-                "role": "maintainer",
-                "organization": "Reiner Lemoine Institut gGmbH",
-            },
-            {
-                "title": "oakca",
-                "email": None,
-                "path": "https://github.com/oakca",
-                "role": "contributor",
-                "organization": "Reiner Lemoine Institut gGmbH",
-            },
+                "date": None,
+                "object": None,
+                "comment": None
+            }
         ],
         "review": {"path": None, "badge": None},
         "metaMetadata": {
-            "metadataVersion": "OEP-1.4.0",
+            "metadataVersion": "OEP-1.5.2",
             "metadataLicense": {
                 "name": "CC0-1.0",
                 "title": "Creative Commons Zero v1.0 Universal",
@@ -172,6 +161,7 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
             "licenses": "License name must follow the SPDX License List (https://spdx.org/licenses/)",
             "review": "Following the OEP Data Review (https://github.com/OpenEnergyPlatform/data-preprocessing/wiki)",
             "null": "If not applicable use (null)",
+            "todo": "If a value ist not yet available, use: todo"
         },
     }
 
@@ -253,50 +243,44 @@ def create_datapackage_meta_json(
             resource = {
                 "profile": "tabular-data-resource",
                 "name": f"bnetza_mastr_{tech}_raw",
-                "title": f"open-MaStR {tech} units (raw)",
                 "path": filenames["raw"][tech]["joined"],
-                "scheme": "file",
+                "format": "csv",
                 "encoding": "utf-8",
-                "mediatype": "text/csv",
                 "schema": {
                     "fields": raw_fields,
                     "primaryKey": ["EinheitMastrNummer"],
                 },
                 "dialect": {"delimiter": ","},
             }
-
             resources_meta["resources"].append(resource)
         if "cleaned" in data:
             resource = {
                 "profile": "tabular-data-resource",
                 "name": f"bnetza_mastr_{tech}_cleaned",
-                "title": f"open-MaStR {tech} units (cleaned)",
                 "path": filenames["cleaned"][tech],
-                "scheme": "file",
+                "format": "csv",
                 "encoding": "utf-8",
-                "mediatype": "text/csv",
                 "schema": {
                     "fields": raw_fields,
                     "primaryKey": ["EinheitMastrNummer"],
                 },
                 "dialect": {"delimiter": ","},
             }
-
             resources_meta["resources"].append(resource)
         if "postprocessed" in data:
             processed_fields = [
                 {
                     "name": "geom",
                     "unit": None,
                     "type": "str",
-                    "desciption": "Standort der Anlage als Punktgeometrie im WKB Format",
+                    "description": "Standort der Anlage als Punktgeometrie im WKB Format",
                     "examples": "0101000020e610000071fbe59315131c40a2b437f8c20e4a40",
                 },
                 {
                     "name": "comment",
                     "unit": None,
                     "type": "str",
-                    "desciption": "Information about data post-processing",
+                    "description": "Information about data post-processing",
                     "examples": "has_geom; outside_vg250",
                 },
             ]
@@ -306,7 +290,7 @@ def create_datapackage_meta_json(
                         "name": "tags",
                         "unit": None,
                         "type": "json",
-                        "desciption": "Data insights and report about post-processing steps",
+                        "description": "Data insights and report about post-processing steps",
                         "examples": {
                             "plz_check": False,
                             "processed": True,
@@ -319,18 +303,16 @@ def create_datapackage_meta_json(
                         "name": "geom",
                         "unit": None,
                         "type": "str",
-                        "desciption": "Standort der Anlage als Punktgeometrie im WKB Format (EPSG 3035)",
+                        "description": "Standort der Anlage als Punktgeometrie im WKB Format (EPSG 3035)",
                         "examples": "0101000020e610000071fbe59315131c40a2b437f8c20e4a40",
                     }
                 )
             resource = {
                 "profile": "tabular-data-resource",
                 "name": f"bnetza_mastr_{tech}",
-                "title": f"open-MaStR {tech} units",
                 "path": filenames["postprocessed"][tech],
-                "scheme": "file",
+                "format": "csv",
                 "encoding": "utf-8",
-                "mediatype": "text/csv",
                 "schema": {
                     "fields": raw_fields + processed_fields,
                     "primaryKey": ["EinheitMastrNummer"],

diff --git a/open_mastr/soap_api/metadata/description.py b/open_mastr/soap_api/metadata/description.py
@@ -33,19 +33,19 @@ def __init__(self, xml=None):
                 self.xml = fh.read()
         else:
             # If no XML file is given, the file is read from an URL
-            zipurl = 'https://www.marktstammdatenregister.de/MaStRHilfe/files/' \
-                     'webdienst/Dienstbeschreibung_1_2_39_Produktion.zip'
+            zipurl = "https://www.marktstammdatenregister.de/MaStRHilfe/files/webdienst/" \
+                     "Dienstbeschreibung_Produktion_Version" \
+                     "1.2.87" \ # update version here
+                     ".zip"
 
             with urlopen(zipurl) as zipresp:
                 with ZipFile(BytesIO(zipresp.read())) as zfile:
-                    self.xml = zfile.read('xsd/mastrbasetypes.xsd')
-
-
+                    self.xml = zfile.read("xsd/mastrbasetypes.xsd")
 
         # Parse XML and extract relevant data
         parsed = xmltodict.parse(self.xml, process_namespaces=False)
-        self.complex_types = parsed['schema']["complexType"]
-        self.simple_types = parsed['schema']["simpleType"]
+        self.complex_types = parsed["schema"]["complexType"]
+        self.simple_types = parsed["schema"]["simpleType"]
 
         # Prepare parsed data for documentational purposes
         abstract_types, parameters, responses, types = self._filter_type_descriptions()
@@ -78,13 +78,17 @@ def _filter_type_descriptions(self):
                     raise ValueError("Ohh...")
             else:
                 # Filter all functions
-                if item["@name"].startswith(("Get", "Set", "Erneute", "Verschiebe", "Delete")):
+                if item["@name"].startswith(
+                    ("Get", "Set", "Erneute", "Verschiebe", "Delete")
+                ):
                     functions.append(item)
 
                     # Further split the list of functions into paramters and responses
                     if item["@name"].endswith("Parameter"):
                         if "complexContent" in item.keys():
-                            parameters[item["@name"]] = item["complexContent"]["extension"]
+                            parameters[item["@name"]] = item["complexContent"][
+                                "extension"
+                            ]
                         else:
                             parameters[item["@name"]] = item
                     elif item["@name"].endswith("Antwort"):
@@ -111,12 +115,14 @@ def prepare_simple_type(self):
 
         for simple_type in self.simple_types:
             if "enumeration" in simple_type["restriction"]:
-                possible_values = [_["@value"] for _ in simple_type["restriction"]["enumeration"]]
+                possible_values = [
+                    _["@value"] for _ in simple_type["restriction"]["enumeration"]
+                ]
             else:
                 possible_values = []
             simple_types_doc[simple_type["@name"]] = {
                 "type": simple_type["restriction"]["@base"],
-                "values": possible_values
+                "values": possible_values,
             }
         return simple_types_doc
 
@@ -140,49 +146,61 @@ def functions_data_documentation(self):
                     if "annotation" in fcn["sequence"]["element"]:
                         fcn_data = [fcn["sequence"]["element"]]
                     else:
-                        fcn_data = self.types[fcn["sequence"]["element"]["@type"].split(":")[1]]["sequence"]["element"]
+                        fcn_data = self.types[
+                            fcn["sequence"]["element"]["@type"].split(":")[1]
+                        ]["sequence"]["element"]
                 else:
                     print(type(fcn["sequence"]))
                     print(fcn["sequence"])
                     raise ValueError
 
                 # Add data for inherited columns from base types
                 if "@base" in fcn:
-                    if not fcn["@base"] == 'mastr:AntwortBasis':
-                        fcn_data = _collect_columns_of_base_type(self.types, fcn["@base"].split(":")[1], fcn_data)
+                    if not fcn["@base"] == "mastr:AntwortBasis":
+                        fcn_data = _collect_columns_of_base_type(
+                            self.types, fcn["@base"].split(":")[1], fcn_data
+                        )
                 function_docs[fcn_name] = {}
                 for column in fcn_data:
                     # Replace MaStR internal types with more general ones
                     if column["@type"].startswith("mastr:"):
                         try:
-                            column_type = self.simple_types_prepared[column["@type"].split(":")[1]]["type"]
+                            column_type = self.simple_types_prepared[
+                                column["@type"].split(":")[1]
+                            ]["type"]
                         except KeyError:
                             column_type = column["@type"]
                     else:
                         column_type = column["@type"]
 
                     if "annotation" in column.keys():
-                        description = column["annotation"]["documentation"].get("#text", None)
+                        description = column["annotation"]["documentation"].get(
+                            "#text", None
+                        )
                         if description:
-                            description = re.sub(" +", " ", description.replace("\n", ""))
+                            description = re.sub(
+                                " +", " ", description.replace("\n", "")
+                            )
                         function_docs[fcn_name][column["@name"]] = {
-                                "type":  column_type,
-                                "description": description,
-                                "example": column["annotation"]["documentation"].get("m-ex", None)
+                            "type": column_type,
+                            "description": description,
+                            "example": column["annotation"]["documentation"].get(
+                                "m-ex", None
+                            ),
                         }
                     else:
                         function_docs[fcn_name][column["@name"]] = {
                             "type": column_type,
                             # TODO: insert information from simple type here
                             "description": None,
-                            "example": None
+                            "example": None,
                         }
 
         # Hack in a descrition for a column that gets created after download while flattening data
         function_docs["GetEinheitWind"]["HerstellerId"] = {
             "type": "str",
             "description": "Id des Herstellers der Einheit",
-            "example": 923
+            "example": 923,
         }
 
         return function_docs
@@ -193,7 +211,11 @@ def _collect_columns_of_base_type(base_types, base_type_name, fcn_data):
     fcn_data += type_description["extension"]["sequence"]["element"]
 
     if "@base" in type_description["extension"]:
-        if not type_description["extension"]["@base"] == 'mastr:AntwortBasis':
-            fcn_data = _collect_columns_of_base_type(base_types, type_description["extension"]["@base"].split(":")[1], fcn_data)
+        if not type_description["extension"]["@base"] == "mastr:AntwortBasis":
+            fcn_data = _collect_columns_of_base_type(
+                base_types,
+                type_description["extension"]["@base"].split(":")[1],
+                fcn_data,
+            )
 
     return fcn_data
diff --git a/open_mastr/soap_api/metadata/mastr_datapackage.json b/open_mastr/soap_api/metadata/mastr_datapackage.json
@@ -9,15 +9,29 @@
     ],
     "subject": [
         {
-            "name": null,
-            "path": null
+            "name": "power plant",
+            "path": "http://openenergy-platform.org/ontology/oeo/OEO_00000031"
+        },
+        {
+            "name": "renewable",
+            "path": "http://openenergy-platform.org/ontology/oeo/OEO_00030004"
+        },
+        {
+            "name": "conventional",
+            "path": "http://openenergy-platform.org/ontology/oeo/OEO_00020147"
+        },
+        {
+            "name": "energy storage object",
+            "path": "http://openenergy-platform.org/ontology/oeo/OEO_00000159"
         }
     ],
     "keywords": [
         "powerplants",
-        "renewables"
+        "renewables",
+        "coventional",
+        "storage"
     ],
-    "publicationDate": "2022-05-16",
+    "publicationDate": "2022-12-01",
     "context": {
         "homepage": "https://www.marktstammdatenregister.de/MaStR/",
         "documentation": "https://www.marktstammdatenregister.de/MaStRHilfe/index.html",
@@ -34,7 +48,7 @@
         "resolution": "vector"
     },
     "temporal": {
-        "referenceDate": "2022-05-16",
+        "referenceDate": "2022-12-01",
         "timeseries": [
             {
                 "start": null,
@@ -154,13 +168,20 @@
             "date": "2022-05-16",
             "object": "metadata and data",
             "comment": "Update metadata and run download"
+        },
+        {
+            "title": "chrwm",
+            "email": null,
+            "date": "2022-12-01",
+            "object": "metadata and data",
+            "comment": "Update metadata and run bulk download with open-MaStR v0.12.2"
         }
     ],
     "resources": [
         {
             "profile": "tabular-data-resource",
             "name": "bnetza_open_mastr_wind",
-            "path": "bnetza_open_mastr_wind.csv",
+            "path": "https://doi.org/10.5281/zenodo.7387843",
             "format": "csv",
             "encoding": "UTF-8",
             "schema": {
@@ -3130,14 +3151,14 @@
                 ]
             },
             "dialect": {
-                "delimiter": ";",
+                "delimiter": ",",
                 "decimalSeparator": "."
             }
         },
         {
             "profile": "tabular-data-resource",
             "name": "bnetza_open_mastr_hydro",
-            "path": "bnetza_open_mastr_hydro.csv",
+            "path": "https://doi.org/10.5281/zenodo.7387843",
             "format": "csv",
             "encoding": "UTF-8",
             "schema": {
@@ -5651,14 +5672,14 @@
                 ]
             },
             "dialect": {
-                "delimiter": ";",
+                "delimiter": ",",
                 "decimalSeparator": "."
             }
         },
         {
             "profile": "tabular-data-resource",
             "name": "bnetza_open_mastr_biomass",
-            "path": "bnetza_open_mastr_biomass.csv",
+            "path": "https://doi.org/10.5281/zenodo.7387843",
             "format": "csv",
             "encoding": "UTF-8",
             "schema": {
@@ -8381,14 +8402,14 @@
                 ]
             },
             "dialect": {
-                "delimiter": ";",
+                "delimiter": ",",
                 "decimalSeparator": "."
             }
         },
         {
             "profile": "tabular-data-resource",
             "name": "bnetza_open_mastr_solar",
-            "path": "bnetza_open_mastr_solar.csv",
+            "path": "https://doi.org/10.5281/zenodo.7387843",
             "format": "csv",
             "encoding": "UTF-8",
             "schema": {
@@ -11187,14 +11208,14 @@
                 ]
             },
             "dialect": {
-                "delimiter": ";",
+                "delimiter": ",",
                 "decimalSeparator": "."
             }
         },
         {
             "profile": "tabular-data-resource",
             "name": "bnetza_open_mastr_storage",
-            "path": "bnetza_open_mastr_storage.csv",
+            "path": "https://doi.org/10.5281/zenodo.7387843",
             "format": "csv",
             "encoding": "UTF-8",
             "schema": {
@@ -13556,14 +13577,14 @@
                 ]
             },
             "dialect": {
-                "delimiter": ";",
+                "delimiter": ",",
                 "decimalSeparator": "."
             }
         },
         {
             "profile": "tabular-data-resource",
             "name": "bnetza_open_mastr_combustion",
-            "path": "bnetza_open_mastr_combustion.csv",
+            "path": "https://doi.org/10.5281/zenodo.7387843",
             "format": "csv",
             "encoding": "UTF-8",
             "schema": {
@@ -16034,14 +16055,14 @@
                 ]
             },
             "dialect": {
-                "delimiter": ";",
+                "delimiter": ",",
                 "decimalSeparator": "."
             }
         },
         {
             "profile": "tabular-data-resource",
             "name": "bnetza_open_mastr_nuclear",
-            "path": "bnetza_open_mastr_nuclear.csv",
+            "path": "https://doi.org/10.5281/zenodo.7387843",
             "format": "csv",
             "encoding": "UTF-8",
             "schema": {
@@ -17961,14 +17982,14 @@
                 ]
             },
             "dialect": {
-                "delimiter": ";",
+                "delimiter": ",",
                 "decimalSeparator": "."
             }
         },
         {
             "profile": "tabular-data-resource",
             "name": "bnetza_open_mastr_gsgk",
-            "path": "bnetza_open_mastr_gsgk.csv",
+            "path": "https://doi.org/10.5281/zenodo.7387843",
             "format": "csv",
             "encoding": "UTF-8",
             "schema": {
@@ -20268,13 +20289,13 @@
                 ]
             },
             "dialect": {
-                "delimiter": ";",
+                "delimiter": ",",
                 "decimalSeparator": "."
             }
         }
     ],
     "@id": null,
-    "@context": null,
+    "@context": "https://raw.githubusercontent.com/OpenEnergyPlatform/oemetadata/master/metadata/latest/context.json",
     "review": {
         "path": null,
         "badge": null

diff --git a/open_mastr/xml_download/utils_cleansing_bulk.py b/open_mastr/xml_download/utils_cleansing_bulk.py
@@ -5,10 +5,14 @@
     columns_replace_list,
 )
 from zipfile import ZipFile
+from open_mastr.utils.config import setup_logger
+
+# setup logger
+log = setup_logger()
 
 
 def cleanse_bulk_data(df: pd.DataFrame, zipped_xml_file_path: str) -> pd.DataFrame:
-    print("Data is cleansed.")
+    log.info("Data is cleansed.")
     df = replace_ids_with_names(df, system_catalog)
     # Katalogeintraege: int -> string value
     df = replace_mastr_katalogeintraege(

diff --git a/open_mastr/xml_download/utils_write_to_database.py b/open_mastr/xml_download/utils_write_to_database.py
@@ -13,6 +13,7 @@
 from open_mastr.xml_download.utils_cleansing_bulk import cleanse_bulk_data
 from open_mastr.utils.config import setup_logger
 
+log = setup_logger()
 
 def write_mastr_xml_to_database(
     engine: sqlalchemy.engine.Engine,
@@ -38,11 +39,11 @@ def write_mastr_xml_to_database(
 
                 if is_first_file(file_name):
                     create_database_table(engine=engine, xml_tablename=xml_tablename)
-                    print(
+                    log.info(
                         f"Table '{sql_tablename}' is filled with data '{xml_tablename}' "
                         "from the bulk download."
                     )
-                print(f"File '{file_name}' is parsed.")
+                log.info(f"File '{file_name}' is parsed.")
 
                 df = preprocess_table_for_writing_to_database(
                     f=f,
@@ -64,7 +65,7 @@ def write_mastr_xml_to_database(
                     if_exists="append",
                     engine=engine,
                 )
-    print("Bulk download and data cleansing were successful.")
+    log.info("Bulk download and data cleansing were successful.")
 
 
 def is_table_relevant(xml_tablename: str, include_tables: list) -> bool:
@@ -289,7 +290,7 @@ def write_single_entries_until_not_unique_comes_up(
     len_df_before = len(df)
     df = df.drop(labels=key_list, errors="ignore")
     df = df.reset_index()
-    print(f"{len_df_before-len(df)} entries already existed in the database.")
+    log.info(f"{len_df_before-len(df)} entries already existed in the database.")
 
     return df
 
@@ -311,7 +312,7 @@ def add_missing_column_to_table(
     -------
 
     """
-    log = setup_logger()
+
 
     if engine.name == "postgresql":
         missing_column = err.args[0].split("»")[1].split("«")[0]
@@ -337,7 +338,7 @@ def add_missing_column_to_table(
 
 def delete_wrong_xml_entry(err: Error, df: pd.DataFrame) -> None:
     delete_entry = str(err).split("«")[0].split("»")[1]
-    print(f"The entry {delete_entry} was deleted due to its false data type.")
+    log.info(f"The entry {delete_entry} was deleted due to its false data type.")
     df = df.replace(delete_entry, np.nan)
 
 
@@ -376,7 +377,7 @@ def handle_xml_syntax_error(data: bytes, err: Error) -> pd.DataFrame:
         else:
             decoded_data = decoded_data[:start_char] + decoded_data[start_char + 1 :]
     df = pd.read_xml(decoded_data)
-    print("One invalid xml expression was deleted.")
+    log.info("One invalid xml expression was deleted.")
     return df