diff --git a/terracotta/config.py b/terracotta/config.py index 4dbcfc3e..791f6f33 100644 --- a/terracotta/config.py +++ b/terracotta/config.py @@ -92,6 +92,9 @@ class TerracottaSettings(NamedTuple): #: Use a process pool for band retrieval in parallel USE_MULTIPROCESSING: bool = True + #: Maximum number of metadata keys per POST /metadata request + MAX_POST_METADATA_KEYS: int = 100 + AVAILABLE_SETTINGS: Tuple[str, ...] = TerracottaSettings._fields @@ -158,6 +161,8 @@ class SettingSchema(Schema): USE_MULTIPROCESSING = fields.Boolean() + MAX_POST_METADATA_KEYS = fields.Integer(validate=validate.Range(min=1)) + @pre_load def decode_lists(self, data: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]: for var in ( diff --git a/terracotta/handlers/metadata.py b/terracotta/handlers/metadata.py index 325f4b5e..15b22e3b 100644 --- a/terracotta/handlers/metadata.py +++ b/terracotta/handlers/metadata.py @@ -3,18 +3,60 @@ Handle /metadata API endpoint. """ -from typing import Mapping, Sequence, Dict, Any, Union +from typing import Mapping, Sequence, Dict, Any, Union, List, Optional from collections import OrderedDict from terracotta import get_settings, get_driver +from terracotta.exceptions import InvalidArgumentsError from terracotta.profile import trace +def filter_metadata( + metadata: Dict[str, Any], columns: Optional[List[str]] +) -> Dict[str, Any]: + """Filter metadata by columns, if given""" + assert ( + columns is None or len(columns) > 0 + ), "columns must either be a non-empty list or None" + + if columns: + metadata = {c: metadata[c] for c in columns} + + return metadata + + @trace("metadata_handler") -def metadata(keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[str, Any]: +def metadata( + columns: Optional[List[str]], keys: Union[Sequence[str], Mapping[str, str]] +) -> Dict[str, Any]: """Returns all metadata for a single dataset""" settings = get_settings() driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER) - metadata = driver.get_metadata(keys) + metadata = filter_metadata(driver.get_metadata(keys), columns) metadata["keys"] = OrderedDict(zip(driver.key_names, keys)) return metadata + + +@trace("multiple_metadata_handler") +def multiple_metadata( + columns: Optional[List[str]], datasets: List[List[str]] +) -> List[Dict[str, Any]]: + """Returns all metadata for multiple datasets""" + settings = get_settings() + driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER) + key_names = driver.key_names + + if len(datasets) > settings.MAX_POST_METADATA_KEYS: + raise InvalidArgumentsError( + f"Maximum number of keys exceeded ({settings.MAX_POST_METADATA_KEYS}). " + f"This limit can be configured in the server settings." + ) + + out = [] + with driver.connect(): + for dataset in datasets: + metadata = filter_metadata(driver.get_metadata(dataset), columns) + metadata["keys"] = OrderedDict(zip(key_names, dataset)) + out.append(metadata) + + return out diff --git a/terracotta/scripts/click_types.py b/terracotta/scripts/click_types.py index 341c19ad..cb282b4c 100644 --- a/terracotta/scripts/click_types.py +++ b/terracotta/scripts/click_types.py @@ -26,7 +26,7 @@ class PathlibPath(click.Path): """Converts a string to a pathlib.Path object""" def convert(self, *args: Any) -> pathlib.Path: - return pathlib.Path(super().convert(*args)) + return pathlib.Path(str(super().convert(*args))) RasterPatternType = Tuple[List[str], Dict[Tuple[str, ...], str]] diff --git a/terracotta/server/metadata.py b/terracotta/server/metadata.py index 8a547162..573926fd 100644 --- a/terracotta/server/metadata.py +++ b/terracotta/server/metadata.py @@ -3,10 +3,14 @@ Flask route to handle /metadata calls. """ -from marshmallow import Schema, fields, validate -from flask import jsonify, Response +from typing import Any, Mapping, Dict +import json + +from marshmallow import Schema, fields, validate, pre_load, ValidationError +from flask import jsonify, Response, request from terracotta.server.flask_api import METADATA_API +from terracotta.exceptions import InvalidArgumentsError class MetadataSchema(Schema): @@ -50,6 +54,42 @@ class Meta: ) +class MetadataColumnsSchema(Schema): + columns = fields.List( + fields.String(), + description="List of columns to return", + required=False, + ) + + @pre_load + def validate_columns( + self, data: Mapping[str, Any], **kwargs: Any + ) -> Dict[str, Any]: + data = dict(data.items()) + var = "columns" + val = data.get(var) + if val: + try: + data[var] = json.loads(val) + except json.decoder.JSONDecodeError as exc: + raise ValidationError( + f"Could not decode value for {var} as JSON" + ) from exc + return data + + +class MultipleMetadataDatasetsSchema(Schema): + keys = fields.List( + fields.List( + fields.String(), + description="Keys identifying dataset", + required=True, + ), + required=True, + description="Array containing all available key combinations", + ) + + @METADATA_API.route("/metadata/", methods=["GET"]) def get_metadata(keys: str) -> Response: """Get metadata for given dataset @@ -63,6 +103,8 @@ def get_metadata(keys: str) -> Response: description: Keys of dataset to retrieve metadata for (e.g. 'value1/value2') type: path required: true + - in: query + schema: MetadataColumnsSchema responses: 200: description: All metadata for given dataset @@ -72,7 +114,53 @@ def get_metadata(keys: str) -> Response: """ from terracotta.handlers.metadata import metadata + columns_schema = MetadataColumnsSchema() + columns = columns_schema.load(request.args).get("columns") + parsed_keys = [key for key in keys.split("/") if key] - payload = metadata(parsed_keys) - schema = MetadataSchema() + + payload = metadata(columns, parsed_keys) + schema = MetadataSchema(partial=columns is not None) + return jsonify(schema.load(payload)) + + +@METADATA_API.route("/metadata", methods=["POST"]) +def get_multiple_metadata() -> Response: + """Get metadata for multiple datasets + --- + post: + summary: /metadata + description: + Retrieve metadata for multiple datasets, identified by the + body payload. Desired columns can be filtered using the ?columns + query. + parameters: + - in: query + schema: MetadataColumnsSchema + - in: body + schema: MultipleMetadataDatasetsSchema + responses: + 200: + description: All metadata for given dataset + schema: MetadataSchema + 400: + description: + If the maximum number of requested datasets is exceeded + 404: + description: No dataset found for given key combination + """ + from terracotta.handlers.metadata import multiple_metadata + + request_body = request.json + if not isinstance(request_body, dict): + raise InvalidArgumentsError("Request body must be a JSON object") + + datasets_schema = MultipleMetadataDatasetsSchema() + datasets = datasets_schema.load(request_body).get("keys") + + columns_schema = MetadataColumnsSchema() + columns = columns_schema.load(request.args).get("columns") + + payload = multiple_metadata(columns, datasets) + schema = MetadataSchema(many=True, partial=columns is not None) return jsonify(schema.load(payload)) diff --git a/tests/handlers/test_metadata.py b/tests/handlers/test_metadata.py index 3d06ba07..9a6395ec 100644 --- a/tests/handlers/test_metadata.py +++ b/tests/handlers/test_metadata.py @@ -2,6 +2,30 @@ def test_metadata_handler(use_testdb): from terracotta.handlers import metadata, datasets ds = datasets.datasets()[0] - md = metadata.metadata(ds) + md = metadata.metadata(None, ds) assert md assert md["metadata"] == ["extra_data"] + + md = metadata.metadata(["metadata", "bounds"], ds) + assert md + assert len(md.keys()) == 3 + assert all(k in md.keys() for k in ("metadata", "bounds", "keys")) + + +def test_multiple_metadata_handler(use_testdb): + from terracotta.handlers import metadata, datasets + + ds = datasets.datasets() + ds1 = list(ds[0].values()) + ds2 = list(ds[1].values()) + + md = metadata.multiple_metadata(None, [ds1, ds2]) + + assert md + assert md[0]["metadata"] == ["extra_data"] + assert len(md) == 2 + + md = metadata.multiple_metadata(["metadata", "bounds"], [ds1, ds2]) + assert md + assert len(md[0].keys()) == 3 + assert all(k in md[0].keys() for k in ("metadata", "bounds", "keys")) diff --git a/tests/server/test_flask_api.py b/tests/server/test_flask_api.py index 6a216ad2..f7e5aff8 100644 --- a/tests/server/test_flask_api.py +++ b/tests/server/test_flask_api.py @@ -90,6 +90,56 @@ def test_get_metadata_nonexisting(client, use_testdb): assert rv.status_code == 404 +def test_post_metadata(client, use_testdb): + rv = client.post( + "/metadata", + json={"keys": [["val11", "x", "val12"], ["val21", "x", "val22"]]}, + ) + + assert rv.status_code == 200 + assert len(json.loads(rv.data)) == 2 + + +def test_post_metadata_specific_columns(client, use_testdb): + rv = client.post( + '/metadata?columns=["bounds", "range"]', + json={"keys": [["val11", "x", "val12"], ["val21", "x", "val22"]]}, + ) + + assert rv.status_code == 200 + assert len(json.loads(rv.data)) == 2 + assert set(json.loads(rv.data)[0].keys()) == {"bounds", "range", "keys"} + + +def test_post_metadata_errors(debug_client, use_non_writable_testdb): + from terracotta import exceptions + import marshmallow + + with pytest.raises(marshmallow.ValidationError): + debug_client.post( + '/metadata?columns=["range]', + json={"keys": [["val11", "x", "val12"], ["val21", "x", "val22"]]}, + ) + + with pytest.raises(exceptions.InvalidArgumentsError): + debug_client.post( + '/metadata?columns=["range"]', + json={"keys": [["val11", "x", "val12"] for _ in range(101)]}, + ) + + with pytest.raises(exceptions.InvalidArgumentsError): + debug_client.post( + '/metadata?columns=["range"]', + json="Invalid JSON", + ) + + with pytest.raises(KeyError): + debug_client.post( + '/metadata?columns=["invalid"]', + json={"keys": [["val11", "x", "val12"], ["val21", "x", "val22"]]}, + ) + + def test_get_datasets(client, use_testdb): rv = client.get("/datasets") assert rv.status_code == 200