-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Ensure parent docs are collected during doc reconstruct * mock using patch (#1160) * Add a client for Aryn, use the new client to read docs * Use list_docs and get_doc for reading from Aryn * Mark Aryn classes experimental * Fix lint --------- Co-authored-by: Dhruv Kaliraman <[email protected]>
- Loading branch information
1 parent
7f008bf
commit 2227815
Showing
7 changed files
with
225 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import logging | ||
from typing import Any | ||
|
||
import requests | ||
|
||
from sycamore.decorators import experimental | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@experimental | ||
class ArynClient: | ||
def __init__(self, aryn_url: str, api_key: str): | ||
self.aryn_url = aryn_url | ||
self.api_key = api_key | ||
|
||
def list_docs(self, docset_id: str) -> list[str]: | ||
try: | ||
response = requests.get( | ||
f"{self.aryn_url}/docsets/{docset_id}/docs", headers={"Authorization": f"Bearer {self.api_key}"} | ||
) | ||
items = response.json()["items"] | ||
return [item["doc_id"] for item in items] | ||
except Exception as e: | ||
raise ValueError(f"Error listing docs: {e}") | ||
|
||
def get_doc(self, docset_id: str, doc_id: str) -> dict[str, Any]: | ||
try: | ||
response = requests.get( | ||
f"{self.aryn_url}/docsets/{docset_id}/docs/{doc_id}", | ||
headers={"Authorization": f"Bearer {self.api_key}"}, | ||
) | ||
if response.status_code != 200: | ||
raise ValueError( | ||
f"Error getting doc {doc_id}, received {response.status_code} {response.text} {response.reason}" | ||
) | ||
doc = response.json() | ||
if doc is None: | ||
raise ValueError(f"Received None for doc {doc_id}") | ||
logger.debug(f"Got doc {doc}") | ||
return doc | ||
except Exception as e: | ||
raise ValueError(f"Error getting doc {doc_id}: {e}") | ||
|
||
def create_docset(self, name: str) -> str: | ||
try: | ||
response = requests.post( | ||
f"{self.aryn_url}/docsets", json={"name": name}, headers={"Authorization": f"Bearer {self.api_key}"} | ||
) | ||
return response.json()["docset_id"] | ||
except Exception as e: | ||
raise ValueError(f"Error creating docset: {e}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import warnings | ||
|
||
|
||
def experimental(cls): | ||
""" | ||
Decorator to mark a class as experimental. | ||
""" | ||
|
||
def wrapper(*args, **kwargs): | ||
warnings.warn( | ||
f"Class {cls.__name__} is experimental and may change in the future.", FutureWarning, stacklevel=2 | ||
) | ||
return cls(*args, **kwargs) | ||
|
||
return wrapper |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
30 changes: 30 additions & 0 deletions
30
lib/sycamore/sycamore/tests/integration/connectors/aryn/test_client.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import os | ||
|
||
import pytest | ||
|
||
from sycamore.connectors.aryn.client import ArynClient | ||
|
||
|
||
aryn_endpoint = os.getenv("ARYN_ENDPOINT") | ||
|
||
|
||
@pytest.mark.skip(reason="For manual testing only") | ||
def test_list_docs(): | ||
aryn_api_key = os.getenv("ARYN_TEST_API_KEY") | ||
client = ArynClient(aryn_url=f"{aryn_endpoint}", api_key=aryn_api_key) | ||
docset_id = "" | ||
docs = client.list_docs(docset_id) | ||
for doc in docs: | ||
print(doc) | ||
|
||
|
||
@pytest.mark.skip(reason="For manual testing only") | ||
def test_get_doc(): | ||
aryn_api_key = os.getenv("ARYN_TEST_API_KEY") | ||
client = ArynClient(aryn_url=f"{aryn_endpoint}", api_key=aryn_api_key) | ||
docset_id = "" | ||
docs = client.list_docs(docset_id) | ||
for doc in docs: | ||
print(doc) | ||
doc = client.get_doc(docset_id, doc) | ||
print(doc) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters