Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add endpoint for querying multiple metadata #311

Merged
merged 16 commits into from
Aug 20, 2023
40 changes: 37 additions & 3 deletions terracotta/handlers/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,52 @@
Handle /metadata API endpoint.
"""

from typing import Mapping, Sequence, Dict, Any, Union
from typing import Mapping, Sequence, Dict, Any, Union, List, Optional
from collections import OrderedDict

from terracotta import get_settings, get_driver
from terracotta.profile import trace


def filter_metadata(
metadata: Dict[str, Any], columns: Optional[List[str]]
) -> Dict[str, Any]:
"""Filter metadata by columns, if given"""
assert (
columns is None or len(columns) > 0
), "columns must either be a non-empty list or None"

if columns:
metadata = {c: metadata[c] for c in columns}

return metadata


@trace("metadata_handler")
def metadata(keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[str, Any]:
def metadata(
columns: Optional[List[str]], keys: Union[Sequence[str], Mapping[str, str]]
) -> Dict[str, Any]:
"""Returns all metadata for a single dataset"""
settings = get_settings()
driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER)
metadata = driver.get_metadata(keys)
metadata = filter_metadata(driver.get_metadata(keys), columns)
metadata["keys"] = OrderedDict(zip(driver.key_names, keys))
return metadata


@trace("multiple_metadata_handler")
def multiple_metadata(
columns: Optional[List[str]], datasets: List[List[str]]
) -> List[Dict[str, Any]]:
"""Returns all metadata for multiple datasets"""
settings = get_settings()
driver = get_driver(settings.DRIVER_PATH, provider=settings.DRIVER_PROVIDER)
key_names = driver.key_names

out = []
for dataset in datasets:
metadata = filter_metadata(driver.get_metadata(dataset), columns)
metadata["keys"] = OrderedDict(zip(key_names, dataset))
out.append(metadata)
DenizYil marked this conversation as resolved.
Show resolved Hide resolved

return out
77 changes: 73 additions & 4 deletions terracotta/server/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
Flask route to handle /metadata calls.
"""

from marshmallow import Schema, fields, validate
from flask import jsonify, Response
from marshmallow import Schema, fields, validate, ValidationError
from flask import jsonify, Response, request

from terracotta.server.flask_api import METADATA_API

Expand Down Expand Up @@ -50,6 +50,35 @@ class Meta:
)


class CommaSeparatedListField(fields.Field):
def _deserialize(self, value, attr, data, **kwargs):
try:
assert value[0] == "[" and value[-1] == "]"

if value == "[]":
return []

return value[1:-1].split(", ")
except ValueError:
raise ValidationError("Invalid input for a list of values.")


class MetadataColumnsSchema(Schema):
columns = CommaSeparatedListField(description="Columns of dataset to be returned")
DenizYil marked this conversation as resolved.
Show resolved Hide resolved


class MultipleMetadataDatasetsSchema(Schema):
keys = fields.List(
fields.List(
fields.String(),
description="Keys identifying dataset",
required=True,
),
required=True,
description="Array containing all available key combinations",
)


@METADATA_API.route("/metadata/<path:keys>", methods=["GET"])
def get_metadata(keys: str) -> Response:
"""Get metadata for given dataset
Expand All @@ -63,6 +92,8 @@ def get_metadata(keys: str) -> Response:
description: Keys of dataset to retrieve metadata for (e.g. 'value1/value2')
type: path
required: true
- in: query
schema: MetadataColumnsSchema
responses:
200:
description: All metadata for given dataset
Expand All @@ -72,7 +103,45 @@ def get_metadata(keys: str) -> Response:
"""
from terracotta.handlers.metadata import metadata

columns_schema = MetadataColumnsSchema()
columns = columns_schema.load(request.args).get("columns")

parsed_keys = [key for key in keys.split("/") if key]
payload = metadata(parsed_keys)
schema = MetadataSchema()

payload = metadata(columns, parsed_keys)
schema = MetadataSchema(partial=columns is not None)
return jsonify(schema.load(payload))


@METADATA_API.route("/metadata", methods=["POST"])
def get_multiple_metadata() -> Response:
"""Get metadata for multiple datasets
---
post:
summary: /metadata
description:
Retrieve metadata for multiple datasets, identified by the
body payload. Desired columns can be filtered using the ?columns
query.
parameters:
- in: query
schema: MetadataColumnsSchema
- in: body
schema: MultipleMetadataDatasetsSchema
responses:
200:
description: All metadata for given dataset
schema: MetadataSchema
404:
description: No dataset found for given key combination
"""
from terracotta.handlers.metadata import multiple_metadata

datasets_schema = MultipleMetadataDatasetsSchema()
datasets = datasets_schema.load(request.json).get("keys")

columns_schema = MetadataColumnsSchema()
columns = columns_schema.load(request.args).get("columns")

schema = MetadataSchema(many=True, partial=columns is not None)
return jsonify(schema.load(multiple_metadata(columns, datasets)))
26 changes: 25 additions & 1 deletion tests/handlers/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,30 @@ def test_metadata_handler(use_testdb):
from terracotta.handlers import metadata, datasets

ds = datasets.datasets()[0]
md = metadata.metadata(ds)
md = metadata.metadata(None, ds)
assert md
assert md["metadata"] == ["extra_data"]

md = metadata.metadata(["metadata", "bounds"], ds)
assert md
assert len(md.keys()) == 3
assert all(k in md.keys() for k in ("metadata", "bounds", "keys"))


def test_multiple_metadata_handler(use_testdb):
from terracotta.handlers import metadata, datasets

ds = datasets.datasets()
ds1 = list(ds[0].values())
ds2 = list(ds[1].values())

md = metadata.multiple_metadata(None, [ds1, ds2])

assert md
assert md[0]["metadata"] == ["extra_data"]
assert len(md) == 2

md = metadata.multiple_metadata(["metadata", "bounds"], [ds1, ds2])
assert md
assert len(md[0].keys()) == 3
assert all(k in md[0].keys() for k in ("metadata", "bounds", "keys"))
Loading