Skip to content

Commit

Permalink
Add Collection.from_items (#1522)
Browse files Browse the repository at this point in the history
* Add Collection.from_items

* Fix datetime for python 3.10

* More tests, fix docs

* Use `Collection` rather than TypeVar

* Update changelog

* objections -> objects
  • Loading branch information
jsignell authored Feb 10, 2025
1 parent 542b9fb commit b3dfa27
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 3 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

### Added

- `Collection.from_items` for creating a `pystac.Collection` from an `ItemCollection` ([#1522](https://github.com/stac-utils/pystac/pull/1522))

### Fixed

- Make sure that `VersionRange` has `VersionID`s rather than strings ([#1512](https://github.com/stac-utils/pystac/pull/1512))
Expand Down
1 change: 1 addition & 0 deletions docs/api/pystac.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ ItemCollection
.. autoclass:: pystac.ItemCollection
:members:
:inherited-members:
:undoc-members:

Link
----
Expand Down
2 changes: 1 addition & 1 deletion pystac/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ class Catalog(STACObject):
catalog_type : Optional catalog type for this catalog. Must
be one of the values in :class:`~pystac.CatalogType`.
strategy : The layout strategy to use for setting the
HREFs of the catalog child objections and items.
HREFs of the catalog child objects and items.
If not provided, it will default to the strategy of the root and fallback to
:class:`~pystac.layout.BestPracticesLayoutStrategy`.
"""
Expand Down
73 changes: 72 additions & 1 deletion pystac/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ class Collection(Catalog, Assets):
:class:`~pystac.Asset` values in the dictionary will have their
:attr:`~pystac.Asset.owner` attribute set to the created Collection.
strategy : The layout strategy to use for setting the
HREFs of the catalog child objections and items.
HREFs of the catalog child objects and items.
If not provided, it will default to strategy of the parent and fallback to
:class:`~pystac.layout.BestPracticesLayoutStrategy`.
"""
Expand Down Expand Up @@ -710,6 +710,77 @@ def from_dict(

return collection

@classmethod
def from_items(
cls: type[Collection],
items: Iterable[Item] | pystac.ItemCollection,
*,
id: str | None = None,
strategy: HrefLayoutStrategy | None = None,
) -> Collection:
"""Create a :class:`Collection` from iterable of items or an
:class:`~pystac.ItemCollection`.
Will try to pull collection attributes from
:attr:`~pystac.ItemCollection.extra_fields` and items when possible.
Args:
items : Iterable of :class:`~pystac.Item` instances to include in the
:class:`Collection`. This can be a :class:`~pystac.ItemCollection`.
id : Identifier for the collection. If not set, must be available on the
items and they must all match.
strategy : The layout strategy to use for setting the
HREFs of the catalog child objects and items.
If not provided, it will default to strategy of the parent and fallback
to :class:`~pystac.layout.BestPracticesLayoutStrategy`.
"""

def extract(attr: str) -> Any:
"""Extract attrs from items or item.properties as long as they all match"""
value = None
values = {getattr(item, attr, None) for item in items}
if len(values) == 1:
value = next(iter(values))
if value is None:
values = {item.properties.get(attr, None) for item in items}
if len(values) == 1:
value = next(iter(values))
return value

if isinstance(items, pystac.ItemCollection):
extra_fields = deepcopy(items.extra_fields)
links = extra_fields.pop("links", {})
providers = extra_fields.pop("providers", None)
if providers is not None:
providers = [pystac.Provider.from_dict(p) for p in providers]
else:
extra_fields = {}
links = {}
providers = []

id = id or extract("collection_id")
if id is None:
raise ValueError(
"Collection id must be defined. Either by specifying collection_id "
"on every item, or as a keyword argument to this function."
)

collection = cls(
id=id,
description=extract("description"),
extent=Extent.from_items(items),
title=extract("title"),
providers=providers,
extra_fields=extra_fields,
strategy=strategy,
)
collection.add_items(items)

for link in links:
collection.add_link(Link.from_dict(link))

return collection

def get_item(self, id: str, recursive: bool = False) -> Item | None:
"""Returns an item with a given ID.
Expand Down
14 changes: 13 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import pytest

from pystac import Asset, Catalog, Collection, Item, Link
from pystac import Asset, Catalog, Collection, Item, ItemCollection, Link

from .utils import ARBITRARY_BBOX, ARBITRARY_EXTENT, ARBITRARY_GEOM, TestCases

Expand Down Expand Up @@ -76,6 +76,18 @@ def sample_item() -> Item:
return Item.from_file(TestCases.get_path("data-files/item/sample-item.json"))


@pytest.fixture
def sample_item_collection() -> ItemCollection:
return ItemCollection.from_file(
TestCases.get_path("data-files/item-collection/sample-item-collection.json")
)


@pytest.fixture
def sample_items(sample_item_collection: ItemCollection) -> list[Item]:
return list(sample_item_collection)


@pytest.fixture(scope="function")
def tmp_asset(tmp_path: Path) -> Asset:
"""Copy the entirety of test-case-2 to tmp and"""
Expand Down
109 changes: 109 additions & 0 deletions tests/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Collection,
Extent,
Item,
ItemCollection,
Provider,
SpatialExtent,
TemporalExtent,
Expand Down Expand Up @@ -711,3 +712,111 @@ def test_permissive_temporal_extent_deserialization(collection: Collection) -> N
]["interval"][0]
with pytest.warns(UserWarning):
Collection.from_dict(collection_dict)


@pytest.mark.parametrize("fixture_name", ("sample_item_collection", "sample_items"))
def test_from_items(fixture_name: str, request: pytest.FixtureRequest) -> None:
items = request.getfixturevalue(fixture_name)
collection = Collection.from_items(items)

for item in items:
assert collection.id == item.collection_id
assert collection.extent.spatial.bboxes[0][0] <= item.bbox[0]
assert collection.extent.spatial.bboxes[0][1] <= item.bbox[1]
assert collection.extent.spatial.bboxes[0][2] >= item.bbox[2]
assert collection.extent.spatial.bboxes[0][3] >= item.bbox[3]

start = collection.extent.temporal.intervals[0][0]
end = collection.extent.temporal.intervals[0][1]
assert start and start <= str_to_datetime(item.properties["start_datetime"])
assert end and end >= str_to_datetime(item.properties["end_datetime"])

if isinstance(items, ItemCollection):
expected = {(link["rel"], link["href"]) for link in items.extra_fields["links"]}
actual = {(link.rel, link.href) for link in collection.links}
assert expected.issubset(actual)


def test_from_items_pulls_from_properties() -> None:
item1 = Item(
id="test-item-1",
geometry=ARBITRARY_GEOM,
bbox=[-10, -20, 0, -10],
datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC),
collection="test-collection-1",
properties={"title": "Test Item", "description": "Extra words describing"},
)
collection = Collection.from_items([item1])
assert collection.id == item1.collection_id
assert collection.title == item1.properties["title"]
assert collection.description == item1.properties["description"]


def test_from_items_without_collection_id() -> None:
item1 = Item(
id="test-item-1",
geometry=ARBITRARY_GEOM,
bbox=[-10, -20, 0, -10],
datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC),
properties={},
)
with pytest.raises(ValueError, match="Collection id must be defined."):
Collection.from_items([item1])

collection = Collection.from_items([item1], id="test-collection")
assert collection.id == "test-collection"


def test_from_items_with_collection_ids() -> None:
item1 = Item(
id="test-item-1",
geometry=ARBITRARY_GEOM,
bbox=[-10, -20, 0, -10],
datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC),
collection="test-collection-1",
properties={},
)
item2 = Item(
id="test-item-2",
geometry=ARBITRARY_GEOM,
bbox=[-15, -20, 0, -10],
datetime=datetime(2000, 2, 1, 13, 0, 0, 0, tzinfo=tz.UTC),
collection="test-collection-2",
properties={},
)

with pytest.raises(ValueError, match="Collection id must be defined."):
Collection.from_items([item1, item2])

collection = Collection.from_items([item1, item2], id="test-collection")
assert collection.id == "test-collection"


def test_from_items_with_different_values() -> None:
item1 = Item(
id="test-item-1",
geometry=ARBITRARY_GEOM,
bbox=[-10, -20, 0, -10],
datetime=datetime(2000, 2, 1, 12, 0, 0, 0, tzinfo=tz.UTC),
properties={"title": "Test Item 1"},
)
item2 = Item(
id="test-item-2",
geometry=ARBITRARY_GEOM,
bbox=[-15, -20, 0, -10],
datetime=datetime(2000, 2, 1, 13, 0, 0, 0, tzinfo=tz.UTC),
properties={"title": "Test Item 2"},
)

collection = Collection.from_items([item1, item2], id="test_collection")
assert collection.title is None


def test_from_items_with_providers(sample_item_collection: ItemCollection) -> None:
sample_item_collection.extra_fields["providers"] = [{"name": "pystac"}]

collection = Collection.from_items(sample_item_collection)
assert collection.providers and len(collection.providers) == 1

provider = collection.providers[0]
assert provider and provider.name == "pystac"

0 comments on commit b3dfa27

Please sign in to comment.