Skip to content

Commit

Permalink
feat: add endpoint for querying multiple metadata (#311)
Browse files Browse the repository at this point in the history
This introduces support for `POST` requests to `/metadata` to retrieve multiple datasets at once.
  • Loading branch information
DenizYil authored Aug 20, 2023
1 parent 3b28c08 commit e6f1952
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 9 deletions.
5 changes: 5 additions & 0 deletions terracotta/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ class TerracottaSettings(NamedTuple):
#: Use a process pool for band retrieval in parallel
USE_MULTIPROCESSING: bool = True

#: Maximum number of metadata keys per POST /metadata request
MAX_POST_METADATA_KEYS: int = 100


AVAILABLE_SETTINGS: Tuple[str, ...] = TerracottaSettings._fields

Expand Down Expand Up @@ -158,6 +161,8 @@ class SettingSchema(Schema):

USE_MULTIPROCESSING = fields.Boolean()

MAX_POST_METADATA_KEYS = fields.Integer(validate=validate.Range(min=1))

@pre_load
def decode_lists(self, data: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]:
for var in (
Expand Down
48 changes: 45 additions & 3 deletions terracotta/handlers/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,60 @@
Handle /metadata API endpoint.
"""

from typing import Mapping, Sequence, Dict, Any, Union
from typing import Mapping, Sequence, Dict, Any, Union, List, Optional
from collections import OrderedDict

from terracotta import get_settings, get_driver
from terracotta.exceptions import InvalidArgumentsError
from terracotta.profile import trace


def filter_metadata(
metadata: Dict[str, Any], columns: Optional[List[str]]
) -> Dict[str, Any]:
"""Filter metadata by columns, if given"""
assert (
columns is None or len(columns) > 0
), "columns must either be a non-empty list or None"

if columns:
metadata = {c: metadata[c] for c in columns}

return metadata


@trace("metadata_handler")
def metadata(keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[str, Any]:
def metadata(
columns: Optional[List[str]], keys: Union[Sequence[str], Mapping[str, str]]
) -> Dict[str, Any]:
"""Returns all metadata for a single dataset"""
settings = get_settings()
driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER)
metadata = driver.get_metadata(keys)
metadata = filter_metadata(driver.get_metadata(keys), columns)
metadata["keys"] = OrderedDict(zip(driver.key_names, keys))
return metadata


@trace("multiple_metadata_handler")
def multiple_metadata(
columns: Optional[List[str]], datasets: List[List[str]]
) -> List[Dict[str, Any]]:
"""Returns all metadata for multiple datasets"""
settings = get_settings()
driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER)
key_names = driver.key_names

if len(datasets) > settings.MAX_POST_METADATA_KEYS:
raise InvalidArgumentsError(
f"Maximum number of keys exceeded ({settings.MAX_POST_METADATA_KEYS}). "
f"This limit can be configured in the server settings."
)

out = []
with driver.connect():
for dataset in datasets:
metadata = filter_metadata(driver.get_metadata(dataset), columns)
metadata["keys"] = OrderedDict(zip(key_names, dataset))
out.append(metadata)

return out
2 changes: 1 addition & 1 deletion terracotta/scripts/click_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class PathlibPath(click.Path):
"""Converts a string to a pathlib.Path object"""

def convert(self, *args: Any) -> pathlib.Path:
return pathlib.Path(super().convert(*args))
return pathlib.Path(str(super().convert(*args)))


RasterPatternType = Tuple[List[str], Dict[Tuple[str, ...], str]]
Expand Down
96 changes: 92 additions & 4 deletions terracotta/server/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
Flask route to handle /metadata calls.
"""

from marshmallow import Schema, fields, validate
from flask import jsonify, Response
from typing import Any, Mapping, Dict
import json

from marshmallow import Schema, fields, validate, pre_load, ValidationError
from flask import jsonify, Response, request

from terracotta.server.flask_api import METADATA_API
from terracotta.exceptions import InvalidArgumentsError


class MetadataSchema(Schema):
Expand Down Expand Up @@ -50,6 +54,42 @@ class Meta:
)


class MetadataColumnsSchema(Schema):
columns = fields.List(
fields.String(),
description="List of columns to return",
required=False,
)

@pre_load
def validate_columns(
self, data: Mapping[str, Any], **kwargs: Any
) -> Dict[str, Any]:
data = dict(data.items())
var = "columns"
val = data.get(var)
if val:
try:
data[var] = json.loads(val)
except json.decoder.JSONDecodeError as exc:
raise ValidationError(
f"Could not decode value for {var} as JSON"
) from exc
return data


class MultipleMetadataDatasetsSchema(Schema):
keys = fields.List(
fields.List(
fields.String(),
description="Keys identifying dataset",
required=True,
),
required=True,
description="Array containing all available key combinations",
)


@METADATA_API.route("/metadata/<path:keys>", methods=["GET"])
def get_metadata(keys: str) -> Response:
"""Get metadata for given dataset
Expand All @@ -63,6 +103,8 @@ def get_metadata(keys: str) -> Response:
description: Keys of dataset to retrieve metadata for (e.g. 'value1/value2')
type: path
required: true
- in: query
schema: MetadataColumnsSchema
responses:
200:
description: All metadata for given dataset
Expand All @@ -72,7 +114,53 @@ def get_metadata(keys: str) -> Response:
"""
from terracotta.handlers.metadata import metadata

columns_schema = MetadataColumnsSchema()
columns = columns_schema.load(request.args).get("columns")

parsed_keys = [key for key in keys.split("/") if key]
payload = metadata(parsed_keys)
schema = MetadataSchema()

payload = metadata(columns, parsed_keys)
schema = MetadataSchema(partial=columns is not None)
return jsonify(schema.load(payload))


@METADATA_API.route("/metadata", methods=["POST"])
def get_multiple_metadata() -> Response:
"""Get metadata for multiple datasets
---
post:
summary: /metadata
description:
Retrieve metadata for multiple datasets, identified by the
body payload. Desired columns can be filtered using the ?columns
query.
parameters:
- in: query
schema: MetadataColumnsSchema
- in: body
schema: MultipleMetadataDatasetsSchema
responses:
200:
description: All metadata for given dataset
schema: MetadataSchema
400:
description:
If the maximum number of requested datasets is exceeded
404:
description: No dataset found for given key combination
"""
from terracotta.handlers.metadata import multiple_metadata

request_body = request.json
if not isinstance(request_body, dict):
raise InvalidArgumentsError("Request body must be a JSON object")

datasets_schema = MultipleMetadataDatasetsSchema()
datasets = datasets_schema.load(request_body).get("keys")

columns_schema = MetadataColumnsSchema()
columns = columns_schema.load(request.args).get("columns")

payload = multiple_metadata(columns, datasets)
schema = MetadataSchema(many=True, partial=columns is not None)
return jsonify(schema.load(payload))
26 changes: 25 additions & 1 deletion tests/handlers/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,30 @@ def test_metadata_handler(use_testdb):
from terracotta.handlers import metadata, datasets

ds = datasets.datasets()[0]
md = metadata.metadata(ds)
md = metadata.metadata(None, ds)
assert md
assert md["metadata"] == ["extra_data"]

md = metadata.metadata(["metadata", "bounds"], ds)
assert md
assert len(md.keys()) == 3
assert all(k in md.keys() for k in ("metadata", "bounds", "keys"))


def test_multiple_metadata_handler(use_testdb):
from terracotta.handlers import metadata, datasets

ds = datasets.datasets()
ds1 = list(ds[0].values())
ds2 = list(ds[1].values())

md = metadata.multiple_metadata(None, [ds1, ds2])

assert md
assert md[0]["metadata"] == ["extra_data"]
assert len(md) == 2

md = metadata.multiple_metadata(["metadata", "bounds"], [ds1, ds2])
assert md
assert len(md[0].keys()) == 3
assert all(k in md[0].keys() for k in ("metadata", "bounds", "keys"))
50 changes: 50 additions & 0 deletions tests/server/test_flask_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,56 @@ def test_get_metadata_nonexisting(client, use_testdb):
assert rv.status_code == 404


def test_post_metadata(client, use_testdb):
rv = client.post(
"/metadata",
json={"keys": [["val11", "x", "val12"], ["val21", "x", "val22"]]},
)

assert rv.status_code == 200
assert len(json.loads(rv.data)) == 2


def test_post_metadata_specific_columns(client, use_testdb):
rv = client.post(
'/metadata?columns=["bounds", "range"]',
json={"keys": [["val11", "x", "val12"], ["val21", "x", "val22"]]},
)

assert rv.status_code == 200
assert len(json.loads(rv.data)) == 2
assert set(json.loads(rv.data)[0].keys()) == {"bounds", "range", "keys"}


def test_post_metadata_errors(debug_client, use_non_writable_testdb):
from terracotta import exceptions
import marshmallow

with pytest.raises(marshmallow.ValidationError):
debug_client.post(
'/metadata?columns=["range]',
json={"keys": [["val11", "x", "val12"], ["val21", "x", "val22"]]},
)

with pytest.raises(exceptions.InvalidArgumentsError):
debug_client.post(
'/metadata?columns=["range"]',
json={"keys": [["val11", "x", "val12"] for _ in range(101)]},
)

with pytest.raises(exceptions.InvalidArgumentsError):
debug_client.post(
'/metadata?columns=["range"]',
json="Invalid JSON",
)

with pytest.raises(KeyError):
debug_client.post(
'/metadata?columns=["invalid"]',
json={"keys": [["val11", "x", "val12"], ["val21", "x", "val22"]]},
)


def test_get_datasets(client, use_testdb):
rv = client.get("/datasets")
assert rv.status_code == 200
Expand Down

0 comments on commit e6f1952

Please sign in to comment.