diff --git a/lib/sycamore/sycamore/docset.py b/lib/sycamore/sycamore/docset.py index 30a35b1be..a77537232 100644 --- a/lib/sycamore/sycamore/docset.py +++ b/lib/sycamore/sycamore/docset.py @@ -122,6 +122,14 @@ def _truncate(s): pprint.pp(document, stream=stream) + def get_doc_info(self, limit: int = -1): + doc_info = [] + for document in self.take(limit): + num_elems = len(document.elements) + document.data["elements"] = f"<{num_elems} elements>" + doc_info.append(document) + return doc_info + def count(self, include_metadata=False, **kwargs) -> int: """ Counts the number of documents in the resulting dataset.