diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 34010525..50468762 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -6,6 +6,6 @@ version: 2 updates: - package-ecosystem: "pip" - directory: "/" + directory: "/backend" schedule: interval: "weekly" diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 1f40de7f..3c5b8939 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -55,7 +55,7 @@ jobs: - name: Run pytests working-directory: ${{ github.workspace }}/backend run: | - pytest --cov=src --junitxml=test-results.xml tests/ + pytest --cov=graphrag_app --junitxml=test-results.xml tests/ - name: Upload test results uses: actions/upload-artifact@v4 diff --git a/backend/.coveragerc b/backend/.coveragerc index ff2039b4..8d93775e 100644 --- a/backend/.coveragerc +++ b/backend/.coveragerc @@ -1,4 +1,3 @@ [run] omit = **/__init__.py - src/models.py diff --git a/backend/README.md b/backend/README.md new file mode 100644 index 00000000..6144de09 --- /dev/null +++ b/backend/README.md @@ -0,0 +1,22 @@ +# Web App +This directory contains the source code for a FastAPI application implements a REST API wrapper around the graphrag library. The app has been packaged up as a python package for a cleaner install/deployment experience. + +## Package Layout +The code has the following structure: +```shell +backend +├── README.md +├── graphrag_app # contains the main application files +│   ├── __init__.py +│   ├── api # endpoint definitions +│   ├── logger # custom loggers designed for graphrag use +│   ├── main.py # initializes the FastAPI application +│   ├── typing # data validation models +│   └── utils # utility/helper functions +├── manifests # k8s manifest files +├── poetry.lock +├── pyproject.toml +├── pytest.ini +├── scripts # miscellaneous scripts that get executed in k8s +└── tests # pytests (integration tests + unit tests) +``` diff --git a/backend/src/__init__.py b/backend/graphrag_app/__init__.py similarity index 100% rename from backend/src/__init__.py rename to backend/graphrag_app/__init__.py diff --git a/backend/src/api/__init__.py b/backend/graphrag_app/api/__init__.py similarity index 100% rename from backend/src/api/__init__.py rename to backend/graphrag_app/api/__init__.py diff --git a/backend/src/api/data.py b/backend/graphrag_app/api/data.py similarity index 56% rename from backend/src/api/data.py rename to backend/graphrag_app/api/data.py index a3051cb1..4d2ce553 100644 --- a/backend/src/api/data.py +++ b/backend/graphrag_app/api/data.py @@ -3,28 +3,30 @@ import asyncio import re +import traceback from math import ceil from typing import List -from azure.storage.blob import ContainerClient +from azure.storage.blob.aio import ContainerClient from fastapi import ( APIRouter, + Depends, HTTPException, UploadFile, ) -from src.api.azure_clients import AzureClientManager -from src.api.common import ( - delete_blob_container, - delete_cosmos_container_item, - sanitize_name, - validate_blob_container_name, -) -from src.logger import LoggerSingleton -from src.models import ( +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.typing.models import ( BaseResponse, StorageNameList, ) +from graphrag_app.utils.common import ( + delete_cosmos_container_item_if_exist, + delete_storage_container_if_exist, + get_blob_container_client, + get_cosmos_container_store_client, + sanitize_name, +) data_route = APIRouter( prefix="/data", @@ -34,26 +36,27 @@ @data_route.get( "", - summary="Get all data storage containers.", + summary="Get list of data containers.", response_model=StorageNameList, responses={200: {"model": StorageNameList}}, ) -async def get_all_data_storage_containers(): +async def get_all_data_containers(): """ - Retrieve a list of all data storage containers. + Retrieve a list of all data containers. """ - azure_client_manager = AzureClientManager() items = [] try: - container_store_client = azure_client_manager.get_cosmos_container_client( - database="graphrag", container="container-store" - ) + container_store_client = get_cosmos_container_store_client() for item in container_store_client.read_all_items(): if item["type"] == "data": items.append(item["human_readable_name"]) - except Exception: - reporter = LoggerSingleton().get_instance() - reporter.on_error("Error getting list of blob containers.") + except Exception as e: + reporter = load_pipeline_logger() + reporter.error( + message="Error getting list of blob containers.", + cause=e, + stack=traceback.format_exc(), + ) raise HTTPException( status_code=500, detail="Error getting list of blob containers." ) @@ -112,10 +115,13 @@ def __exit__(self, *args): responses={200: {"model": BaseResponse}}, ) async def upload_files( - files: List[UploadFile], storage_name: str, overwrite: bool = True + files: List[UploadFile], + container_name: str, + sanitized_container_name: str = Depends(sanitize_name), + overwrite: bool = True, ): """ - Create a data storage container in Azure and upload files to it. + Create a Azure Storage container and upload files to it. Args: files (List[UploadFile]): A list of files to be uploaded. @@ -128,80 +134,73 @@ async def upload_files( Raises: HTTPException: If the container name is invalid or if any error occurs during the upload process. """ - sanitized_storage_name = sanitize_name(storage_name) - # ensure container name follows Azure Blob Storage naming conventions - try: - validate_blob_container_name(sanitized_storage_name) - except ValueError: - raise HTTPException( - status_code=500, - detail=f"Invalid blob container name: '{storage_name}'. Please try a different name.", - ) try: - azure_client_manager = AzureClientManager() - blob_service_client = azure_client_manager.get_blob_service_client_async() - container_client = blob_service_client.get_container_client( - sanitized_storage_name - ) - if not await container_client.exists(): - await container_client.create_container() - # clean files - remove illegal XML characters files = [UploadFile(Cleaner(f.file), filename=f.filename) for f in files] # upload files in batches of 1000 to avoid exceeding Azure Storage API limits + blob_container_client = await get_blob_container_client( + sanitized_container_name + ) batch_size = 1000 - batches = ceil(len(files) / batch_size) - for i in range(batches): + num_batches = ceil(len(files) / batch_size) + for i in range(num_batches): batch_files = files[i * batch_size : (i + 1) * batch_size] tasks = [ - upload_file_async(file, container_client, overwrite) + upload_file_async(file, blob_container_client, overwrite) for file in batch_files ] await asyncio.gather(*tasks) - # update container-store in cosmosDB since upload process was successful - container_store_client = azure_client_manager.get_cosmos_container_client( - database="graphrag", container="container-store" - ) - container_store_client.upsert_item({ - "id": sanitized_storage_name, - "human_readable_name": storage_name, + + # update container-store entry in cosmosDB once upload process is successful + cosmos_container_store_client = get_cosmos_container_store_client() + cosmos_container_store_client.upsert_item({ + "id": sanitized_container_name, + "human_readable_name": container_name, "type": "data", }) return BaseResponse(status="File upload successful.") - except Exception: - logger = LoggerSingleton().get_instance() - logger.on_error("Error uploading files.", details={"files": files}) + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Error uploading files.", + cause=e, + stack=traceback.format_exc(), + details={"files": [f.filename for f in files]}, + ) raise HTTPException( status_code=500, - detail=f"Error uploading files to container '{storage_name}'.", + detail=f"Error uploading files to container '{container_name}'.", ) @data_route.delete( - "/{storage_name}", + "/{container_name}", summary="Delete a data storage container", response_model=BaseResponse, responses={200: {"model": BaseResponse}}, ) -async def delete_files(storage_name: str): +async def delete_files( + container_name: str, sanitized_container_name: str = Depends(sanitize_name) +): """ Delete a specified data storage container. """ - # azure_client_manager = AzureClientManager() - sanitized_storage_name = sanitize_name(storage_name) try: - # delete container in Azure Storage - delete_blob_container(sanitized_storage_name) - # delete entry from container-store in cosmosDB - delete_cosmos_container_item("container-store", sanitized_storage_name) - except Exception: - logger = LoggerSingleton().get_instance() - logger.on_error( - f"Error deleting container {storage_name}.", - details={"Container": storage_name}, + delete_storage_container_if_exist(sanitized_container_name) + delete_cosmos_container_item_if_exist( + "container-store", sanitized_container_name + ) + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message=f"Error deleting container {container_name}.", + cause=e, + stack=traceback.format_exc(), + details={"Container": container_name}, ) raise HTTPException( - status_code=500, detail=f"Error deleting container '{storage_name}'." + status_code=500, + detail=f"Error deleting container '{container_name}'.", ) return BaseResponse(status="Success") diff --git a/backend/src/api/graph.py b/backend/graphrag_app/api/graph.py similarity index 55% rename from backend/src/api/graph.py rename to backend/graphrag_app/api/graph.py index 3569b8a3..a60f597a 100644 --- a/backend/src/api/graph.py +++ b/backend/graphrag_app/api/graph.py @@ -1,18 +1,21 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +import traceback + from fastapi import ( APIRouter, + Depends, HTTPException, ) from fastapi.responses import StreamingResponse -from src.api.azure_clients import AzureClientManager -from src.api.common import ( +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.utils.azure_clients import AzureClientManager +from graphrag_app.utils.common import ( sanitize_name, validate_index_file_exist, ) -from src.logger import LoggerSingleton graph_route = APIRouter( prefix="/graph", @@ -21,20 +24,21 @@ @graph_route.get( - "/graphml/{index_name}", + "/graphml/{container_name}", summary="Retrieve a GraphML file of the knowledge graph", response_description="GraphML file successfully downloaded", ) -async def get_graphml_file(index_name: str): - # validate index_name and graphml file existence +async def get_graphml_file( + container_name, sanitized_container_name: str = Depends(sanitize_name) +): + # validate graphml file existence azure_client_manager = AzureClientManager() - sanitized_index_name = sanitize_name(index_name) - graphml_filename = "summarized_graph.graphml" + graphml_filename = "graph.graphml" blob_filepath = f"output/{graphml_filename}" # expected file location of the graph based on the workflow - validate_index_file_exist(sanitized_index_name, blob_filepath) + validate_index_file_exist(sanitized_container_name, blob_filepath) try: blob_client = azure_client_manager.get_blob_service_client().get_blob_client( - container=sanitized_index_name, blob=blob_filepath + container=sanitized_container_name, blob=blob_filepath ) blob_stream = blob_client.download_blob().chunks() return StreamingResponse( @@ -42,10 +46,14 @@ async def get_graphml_file(index_name: str): media_type="application/octet-stream", headers={"Content-Disposition": f"attachment; filename={graphml_filename}"}, ) - except Exception: - logger = LoggerSingleton().get_instance() - logger.on_error("Could not retrieve graphml file") + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Could not fetch graphml file", + cause=e, + stack=traceback.format_exc(), + ) raise HTTPException( status_code=500, - detail=f"Could not retrieve graphml file for index '{index_name}'.", + detail=f"Could not fetch graphml file for '{container_name}'.", ) diff --git a/backend/graphrag_app/api/index.py b/backend/graphrag_app/api/index.py new file mode 100644 index 00000000..0203e0c7 --- /dev/null +++ b/backend/graphrag_app/api/index.py @@ -0,0 +1,285 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import os +import traceback +from time import time + +from azure.identity import DefaultAzureCredential +from azure.search.documents.indexes import SearchIndexClient +from fastapi import ( + APIRouter, + Depends, + HTTPException, + UploadFile, +) +from kubernetes import ( + client as kubernetes_client, +) +from kubernetes import ( + config as kubernetes_config, +) + +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.typing.models import ( + BaseResponse, + IndexNameList, + IndexStatusResponse, +) +from graphrag_app.typing.pipeline import PipelineJobState +from graphrag_app.utils.azure_clients import AzureClientManager +from graphrag_app.utils.common import ( + delete_cosmos_container_item_if_exist, + delete_storage_container_if_exist, + get_cosmos_container_store_client, + sanitize_name, +) +from graphrag_app.utils.pipeline import PipelineJob + +index_route = APIRouter( + prefix="/index", + tags=["Index Operations"], +) + + +@index_route.post( + "", + summary="Build an index", + response_model=BaseResponse, + responses={200: {"model": BaseResponse}}, +) +async def schedule_index_job( + storage_container_name: str, + index_container_name: str, + entity_extraction_prompt: UploadFile | None = None, + entity_summarization_prompt: UploadFile | None = None, + community_summarization_prompt: UploadFile | None = None, +): + azure_client_manager = AzureClientManager() + blob_service_client = azure_client_manager.get_blob_service_client() + pipelinejob = PipelineJob() + + # validate index name against blob container naming rules + sanitized_index_container_name = sanitize_name(index_container_name) + + # check for data container existence + sanitized_storage_container_name = sanitize_name(storage_container_name) + if not blob_service_client.get_container_client( + sanitized_storage_container_name + ).exists(): + raise HTTPException( + status_code=500, + detail=f"Storage container '{storage_container_name}' does not exist", + ) + + # check for prompts + entity_extraction_prompt_content = ( + entity_extraction_prompt.file.read().decode("utf-8") + if entity_extraction_prompt + else None + ) + entity_summarization_prompt_content = ( + entity_summarization_prompt.file.read().decode("utf-8") + if entity_summarization_prompt + else None + ) + community_summarization_prompt_content = ( + community_summarization_prompt.file.read().decode("utf-8") + if community_summarization_prompt + else None + ) + + # check for existing index job + # it is okay if job doesn't exist, but if it does, + # it must not be scheduled or running + if pipelinejob.item_exist(sanitized_index_container_name): + existing_job = pipelinejob.load_item(sanitized_index_container_name) + if (PipelineJobState(existing_job.status) == PipelineJobState.SCHEDULED) or ( + PipelineJobState(existing_job.status) == PipelineJobState.RUNNING + ): + raise HTTPException( + status_code=202, # request has been accepted for processing but is not complete. + detail=f"Index '{index_container_name}' already exists and has not finished building.", + ) + # if indexing job is in a failed state, delete the associated K8s job and pod to allow for a new job to be scheduled + if PipelineJobState(existing_job.status) == PipelineJobState.FAILED: + _delete_k8s_job( + f"indexing-job-{sanitized_index_container_name}", + os.environ["AKS_NAMESPACE"], + ) + # reset the pipeline job details + existing_job._status = PipelineJobState.SCHEDULED + existing_job._percent_complete = 0 + existing_job._progress = "" + existing_job._all_workflows = existing_job._completed_workflows = ( + existing_job._failed_workflows + ) = [] + existing_job._entity_extraction_prompt = entity_extraction_prompt_content + existing_job._entity_summarization_prompt = entity_summarization_prompt_content + existing_job._community_summarization_prompt = ( + community_summarization_prompt_content + ) + existing_job._epoch_request_time = int(time()) + existing_job.update_db() + else: + pipelinejob.create_item( + id=sanitized_index_container_name, + human_readable_index_name=index_container_name, + human_readable_storage_name=storage_container_name, + entity_extraction_prompt=entity_extraction_prompt_content, + entity_summarization_prompt=entity_summarization_prompt_content, + community_summarization_prompt=community_summarization_prompt_content, + status=PipelineJobState.SCHEDULED, + ) + + return BaseResponse(status="Indexing job scheduled") + + +@index_route.get( + "", + summary="Get all index names", + response_model=IndexNameList, + responses={200: {"model": IndexNameList}}, +) +async def get_all_index_names( + container_store_client=Depends(get_cosmos_container_store_client), +): + """ + Retrieve a list of all index names. + """ + items = [] + try: + for item in container_store_client.read_all_items(): + if item["type"] == "index": + items.append(item["human_readable_name"]) + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Error fetching index list", + cause=e, + stack=traceback.format_exc(), + ) + return IndexNameList(index_name=items) + + +def _get_pod_name(job_name: str, namespace: str) -> str | None: + """Retrieve the name of a kubernetes pod associated with a given job name.""" + # function should work only when running in AKS + if not os.getenv("KUBERNETES_SERVICE_HOST"): + return None + kubernetes_config.load_incluster_config() + v1 = kubernetes_client.CoreV1Api() + ret = v1.list_namespaced_pod(namespace=namespace) + for i in ret.items: + if job_name in i.metadata.name: + return i.metadata.name + return None + + +def _delete_k8s_job(job_name: str, namespace: str) -> None: + """Delete a kubernetes job. + Must delete K8s job first and then any pods associated with it + """ + # function should only work when running in AKS + if not os.getenv("KUBERNETES_SERVICE_HOST"): + return None + logger = load_pipeline_logger() + kubernetes_config.load_incluster_config() + try: + batch_v1 = kubernetes_client.BatchV1Api() + batch_v1.delete_namespaced_job(name=job_name, namespace=namespace) + except Exception as e: + logger.error( + message=f"Error deleting k8s job {job_name}.", + cause=e, + stack=traceback.format_exc(), + details={"container": job_name}, + ) + pass + try: + core_v1 = kubernetes_client.CoreV1Api() + job_pod = _get_pod_name(job_name, os.environ["AKS_NAMESPACE"]) + if job_pod: + core_v1.delete_namespaced_pod(job_pod, namespace=namespace) + except Exception as e: + logger.error( + message=f"Error deleting k8s pod for job {job_name}.", + cause=e, + stack=traceback.format_exc(), + details={"container": job_name}, + ) + pass + + +@index_route.delete( + "/{container_name}", + summary="Delete a specified index", + response_model=BaseResponse, + responses={200: {"model": BaseResponse}}, +) +async def delete_index( + container_name: str, + sanitized_container_name: str = Depends(sanitize_name), +): + """ + Delete a specified index and all associated metadata. + """ + try: + # kill indexing job if it is running + if os.getenv("KUBERNETES_SERVICE_HOST"): # only found if in AKS + _delete_k8s_job(f"indexing-job-{sanitized_container_name}", "graphrag") + + delete_storage_container_if_exist(sanitized_container_name) + delete_cosmos_container_item_if_exist( + "container-store", sanitized_container_name + ) + delete_cosmos_container_item_if_exist("jobs", sanitized_container_name) + + # delete associated AI Search index + index_client = SearchIndexClient( + endpoint=os.environ["AI_SEARCH_URL"], + credential=DefaultAzureCredential(), + audience=os.environ["AI_SEARCH_AUDIENCE"], + ) + ai_search_index_name = f"{sanitized_container_name}_description_embedding" + if ai_search_index_name in index_client.list_index_names(): + index_client.delete_index(ai_search_index_name) + + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message=f"Error encountered while deleting all data for {container_name}.", + cause=e, + stack=traceback.format_exc(), + details={"container": container_name}, + ) + raise HTTPException( + status_code=500, detail=f"Error deleting '{container_name}'." + ) + + return BaseResponse(status="Success") + + +@index_route.get( + "/status/{container_name}", + summary="Track the status of an indexing job", + response_model=IndexStatusResponse, +) +async def get_index_status( + container_name: str, sanitized_container_name: str = Depends(sanitize_name) +): + pipelinejob = PipelineJob() + if pipelinejob.item_exist(sanitized_container_name): + pipeline_job = pipelinejob.load_item(sanitized_container_name) + return IndexStatusResponse( + status_code=200, + index_name=pipeline_job.human_readable_index_name, + storage_name=pipeline_job.human_readable_storage_name, + status=pipeline_job.status.value, + percent_complete=pipeline_job.percent_complete, + progress=pipeline_job.progress, + ) + else: + raise HTTPException( + status_code=404, detail=f"'{container_name}' does not exist." + ) diff --git a/backend/graphrag_app/api/prompt_tuning.py b/backend/graphrag_app/api/prompt_tuning.py new file mode 100644 index 00000000..1daa42e0 --- /dev/null +++ b/backend/graphrag_app/api/prompt_tuning.py @@ -0,0 +1,82 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import traceback +from pathlib import Path + +import graphrag.api as api +import yaml +from fastapi import ( + APIRouter, + Depends, + HTTPException, +) +from graphrag.config.create_graphrag_config import create_graphrag_config + +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.utils.azure_clients import AzureClientManager +from graphrag_app.utils.common import sanitize_name + +prompt_tuning_route = APIRouter(prefix="/index/config", tags=["Prompt Tuning"]) + + +@prompt_tuning_route.get( + "/prompts", + summary="Generate custom graphrag prompts based on user-provided data.", + description="Generating custom prompts from user-provided data may take several minutes to run based on the amount of data used.", +) +async def generate_prompts( + container_name: str, + limit: int = 5, + sanitized_container_name: str = Depends(sanitize_name), +): + """ + Automatically generate custom prompts for entity entraction, + community reports, and summarize descriptions based on a sample of provided data. + """ + # check for storage container existence + azure_client_manager = AzureClientManager() + blob_service_client = azure_client_manager.get_blob_service_client() + if not blob_service_client.get_container_client(sanitized_container_name).exists(): + raise HTTPException( + status_code=500, + detail=f"Storage container '{container_name}' does not exist.", + ) + + # load pipeline configuration file (settings.yaml) for input data and other settings + ROOT_DIR = Path(__file__).resolve().parent.parent.parent + with (ROOT_DIR / "scripts/settings.yaml").open("r") as f: + data = yaml.safe_load(f) + data["input"]["container_name"] = sanitized_container_name + graphrag_config = create_graphrag_config(values=data, root_dir=".") + + # generate prompts + try: + prompts: tuple[str, str, str] = await api.generate_indexing_prompts( + config=graphrag_config, + root=".", + limit=limit, + selection_method="random", + ) + except Exception as e: + logger = load_pipeline_logger() + error_details = { + "storage_name": container_name, + } + logger.error( + message="Auto-prompt generation failed.", + cause=e, + stack=traceback.format_exc(), + details=error_details, + ) + raise HTTPException( + status_code=500, + detail=f"Error generating prompts for data in '{container_name}'. Please try a lower limit.", + ) + + prompt_content = { + "entity_extraction_prompt": prompts[0], + "entity_summarization_prompt": prompts[1], + "community_summarization_prompt": prompts[2], + } + return prompt_content # returns a fastapi.responses.JSONResponse object diff --git a/backend/graphrag_app/api/query.py b/backend/graphrag_app/api/query.py new file mode 100644 index 00000000..cacf95fe --- /dev/null +++ b/backend/graphrag_app/api/query.py @@ -0,0 +1,224 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import traceback +from pathlib import Path + +import yaml +from fastapi import ( + APIRouter, + HTTPException, +) +from graphrag.api.query import global_search, local_search +from graphrag.config.create_graphrag_config import create_graphrag_config + +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.typing.models import ( + GraphRequest, + GraphResponse, +) +from graphrag_app.typing.pipeline import PipelineJobState +from graphrag_app.utils.azure_clients import AzureClientManager +from graphrag_app.utils.common import ( + get_df, + sanitize_name, + validate_index_file_exist, +) +from graphrag_app.utils.pipeline import PipelineJob + +query_route = APIRouter( + prefix="/query", + tags=["Query Operations"], +) + + +@query_route.post( + "/global", + summary="Perform a global search across the knowledge graph index", + description="The global query method generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole.", + response_model=GraphResponse, + responses={200: {"model": GraphResponse}}, +) +async def global_query(request: GraphRequest): + # this is a slightly modified version of the graphrag.query.cli.run_global_search method + index_name = request.index_name + sanitized_index_name = sanitize_name(index_name) + + if not _is_index_complete(sanitized_index_name): + raise HTTPException( + status_code=500, + detail=f"{index_name} not ready for querying.", + ) + + COMMUNITY_REPORT_TABLE = "output/create_final_community_reports.parquet" + COMMUNITIES_TABLE = "output/create_final_communities.parquet" + ENTITIES_TABLE = "output/create_final_entities.parquet" + NODES_TABLE = "output/create_final_nodes.parquet" + + validate_index_file_exist(sanitized_index_name, COMMUNITY_REPORT_TABLE) + validate_index_file_exist(sanitized_index_name, ENTITIES_TABLE) + validate_index_file_exist(sanitized_index_name, NODES_TABLE) + + if isinstance(request.community_level, int): + COMMUNITY_LEVEL = request.community_level + else: + # Current investigations show that community level 1 is the most useful for global search. Set this as the default value + COMMUNITY_LEVEL = 1 + + try: + # read the parquet files into DataFrames and add provenance information + community_report_table_path = ( + f"abfs://{sanitized_index_name}/{COMMUNITY_REPORT_TABLE}" + ) + communities_table_path = f"abfs://{sanitized_index_name}/{COMMUNITIES_TABLE}" + entities_table_path = f"abfs://{sanitized_index_name}/{ENTITIES_TABLE}" + nodes_table_path = f"abfs://{sanitized_index_name}/{NODES_TABLE}" + + # load parquet tables associated with the index + nodes_df = get_df(nodes_table_path) + community_reports_df = get_df(community_report_table_path) + communities_df = get_df(communities_table_path) + entities_df = get_df(entities_table_path) + + # load custom pipeline settings + ROOT_DIR = Path(__file__).resolve().parent.parent.parent + with (ROOT_DIR / "scripts/settings.yaml").open("r") as f: + data = yaml.safe_load(f) + + # layer the custom settings on top of the default configuration settings of graphrag + parameters = create_graphrag_config(data, ".") + + # perform async search + result = await global_search( + config=parameters, + nodes=nodes_df, + entities=entities_df, + communities=communities_df, + community_reports=community_reports_df, + community_level=COMMUNITY_LEVEL, + dynamic_community_selection=False, + response_type="Multiple Paragraphs", + query=request.query, + ) + + return GraphResponse(result=result[0], context_data=result[1]) + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Could not perform global search.", + cause=e, + stack=traceback.format_exc(), + ) + raise HTTPException(status_code=500, detail=None) + + +@query_route.post( + "/local", + summary="Perform a local search across the knowledge graph index.", + description="The local query method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).", + response_model=GraphResponse, + responses={200: {"model": GraphResponse}}, +) +async def local_query(request: GraphRequest): + index_name = request.index_name + sanitized_index_name = sanitize_name(index_name) + + if not _is_index_complete(sanitized_index_name): + raise HTTPException( + status_code=500, + detail=f"{index_name} not ready for querying.", + ) + + azure_client_manager = AzureClientManager() + blob_service_client = azure_client_manager.get_blob_service_client() + + COMMUNITY_REPORT_TABLE = "output/create_final_community_reports.parquet" + COVARIATES_TABLE = "output/create_final_covariates.parquet" + ENTITIES_TABLE = "output/create_final_entities.parquet" + NODES_TABLE = "output/create_final_nodes.parquet" + RELATIONSHIPS_TABLE = "output/create_final_relationships.parquet" + TEXT_UNITS_TABLE = "output/create_final_text_units.parquet" + + if isinstance(request.community_level, int): + COMMUNITY_LEVEL = request.community_level + else: + # Current investigations show that community level 2 is the most useful for local search. Set this as the default value + COMMUNITY_LEVEL = 2 + + # check for existence of files the query relies on to validate the index is complete + validate_index_file_exist(sanitized_index_name, COMMUNITY_REPORT_TABLE) + validate_index_file_exist(sanitized_index_name, ENTITIES_TABLE) + validate_index_file_exist(sanitized_index_name, NODES_TABLE) + validate_index_file_exist(sanitized_index_name, RELATIONSHIPS_TABLE) + validate_index_file_exist(sanitized_index_name, TEXT_UNITS_TABLE) + + community_report_table_path = ( + f"abfs://{sanitized_index_name}/{COMMUNITY_REPORT_TABLE}" + ) + covariates_table_path = f"abfs://{sanitized_index_name}/{COVARIATES_TABLE}" + entities_table_path = f"abfs://{sanitized_index_name}/{ENTITIES_TABLE}" + nodes_table_path = f"abfs://{sanitized_index_name}/{NODES_TABLE}" + relationships_table_path = f"abfs://{sanitized_index_name}/{RELATIONSHIPS_TABLE}" + text_units_table_path = f"abfs://{sanitized_index_name}/{TEXT_UNITS_TABLE}" + + nodes_df = get_df(nodes_table_path) + community_reports_df = get_df(community_report_table_path) + entities_df = get_df(entities_table_path) + relationships_df = get_df(relationships_table_path) + text_units_df = get_df(text_units_table_path) + + # If present, prepare each index's covariates dataframe for merging + index_container_client = blob_service_client.get_container_client( + sanitized_index_name + ) + covariates_df = None + if index_container_client.get_blob_client(COVARIATES_TABLE).exists(): + covariates_df = get_df(covariates_table_path) + + # load custom pipeline settings + ROOT_DIR = Path(__file__).resolve().parent.parent.parent + with (ROOT_DIR / "scripts/settings.yaml").open("r") as f: + data = yaml.safe_load(f) + + # layer the custom settings on top of the default configuration settings of graphrag + parameters = create_graphrag_config(data, ".") + # add index_names to vector_store args + parameters.embeddings.vector_store["collection_name"] = sanitized_index_name + + # perform async search + result = await local_search( + config=parameters, + nodes=nodes_df, + entities=entities_df, + community_reports=community_reports_df, + text_units=text_units_df, + relationships=relationships_df, + covariates=covariates_df, + community_level=COMMUNITY_LEVEL, + response_type="Multiple Paragraphs", + query=request.query, + ) + + return GraphResponse(result=result[0], context_data=result[1]) + + +def _is_index_complete(index_name: str) -> bool: + """ + Check if an index is ready for querying. + + An index is ready for use only if it exists in the jobs table in cosmos db and + the indexing build job has finished (i.e. 100 percent). Otherwise it is not ready. + + Args: + ----- + index_name (str) + Name of the index to check. + + Returns: bool + True if the index is ready for use, False otherwise. + """ + if PipelineJob.item_exist(index_name): + pipeline_job = PipelineJob.load_item(index_name) + if PipelineJobState(pipeline_job.status) == PipelineJobState.COMPLETE: + return True + return False diff --git a/backend/src/api/query_streaming.py b/backend/graphrag_app/api/query_streaming.py similarity index 92% rename from backend/src/api/query_streaming.py rename to backend/graphrag_app/api/query_streaming.py index 6b80d551..a3abe7cb 100644 --- a/backend/src/api/query_streaming.py +++ b/backend/graphrag_app/api/query_streaming.py @@ -13,21 +13,23 @@ HTTPException, ) from fastapi.responses import StreamingResponse -from graphrag.config import create_graphrag_config -from graphrag.query.api import ( +from graphrag.api.query import ( global_search_streaming as global_search_streaming_internal, ) -from graphrag.query.api import local_search_streaming as local_search_streaming_internal +from graphrag.api.query import ( + local_search_streaming as local_search_streaming_internal, +) +from graphrag.config import create_graphrag_config -from src.api.azure_clients import AzureClientManager -from src.api.common import ( +from graphrag_app.api.query import _is_index_complete +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.typing.models import GraphRequest +from graphrag_app.utils.azure_clients import AzureClientManager +from graphrag_app.utils.common import ( + get_df, sanitize_name, validate_index_file_exist, ) -from src.api.query import _is_index_complete -from src.logger import LoggerSingleton -from src.models import GraphRequest -from src.utils import query as query_helper from .query import _get_embedding_description_store, _update_context @@ -43,7 +45,7 @@ description="The global query method generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole.", ) async def global_search_streaming(request: GraphRequest): - # this is a slightly modified version of src.api.query.global_query() method + # this is a slightly modified version of graphrag_app.api.query.global_query() method if isinstance(request.index_name, str): index_names = [request.index_name] else: @@ -105,7 +107,7 @@ async def global_search_streaming(request: GraphRequest): # read parquet files into DataFrames and add provenance information # note that nodes need to set before communities to that max community id makes sense - nodes_df = query_helper.get_df(nodes_table_path) + nodes_df = get_df(nodes_table_path) for i in nodes_df["human_readable_id"]: links["nodes"][i + max_vals["nodes"] + 1] = { "index_name": sanitized_index_names_link[index_name], @@ -123,7 +125,7 @@ async def global_search_streaming(request: GraphRequest): max_vals["nodes"] = nodes_df["human_readable_id"].max() nodes_dfs.append(nodes_df) - community_df = query_helper.get_df(community_report_table_path) + community_df = get_df(community_report_table_path) for i in community_df["community"].astype(int): links["community"][i + max_vals["community"] + 1] = { "index_name": sanitized_index_names_link[index_name], @@ -135,7 +137,7 @@ async def global_search_streaming(request: GraphRequest): max_vals["community"] = community_df["community"].astype(int).max() community_dfs.append(community_df) - entities_df = query_helper.get_df(entities_table_path) + entities_df = get_df(entities_table_path) for i in entities_df["human_readable_id"]: links["entities"][i + max_vals["entities"] + 1] = { "index_name": sanitized_index_names_link[index_name], @@ -185,8 +187,8 @@ async def global_search_streaming(request: GraphRequest): media_type="application/json", ) except Exception as e: - logger = LoggerSingleton().get_instance() - logger.on_error( + logger = load_pipeline_logger() + logger.error( message="Error encountered while streaming global search response", cause=e, stack=traceback.format_exc(), @@ -200,7 +202,7 @@ async def global_search_streaming(request: GraphRequest): description="The local query method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).", ) async def local_search_streaming(request: GraphRequest): - # this is a slightly modified version of src.api.query.local_query() method + # this is a slightly modified version of graphrag_app.api.query.local_query() method if isinstance(request.index_name, str): index_names = [request.index_name] else: @@ -275,7 +277,7 @@ async def local_search_streaming(request: GraphRequest): # read the parquet files into DataFrames and add provenance information # note that nodes need to set before communities to that max community id makes sense - nodes_df = query_helper.get_df(nodes_table_path) + nodes_df = get_df(nodes_table_path) for i in nodes_df["human_readable_id"]: links["nodes"][i + max_vals["nodes"] + 1] = { "index_name": sanitized_index_names_link[index_name], @@ -294,7 +296,7 @@ async def local_search_streaming(request: GraphRequest): max_vals["nodes"] = nodes_df["human_readable_id"].max() nodes_dfs.append(nodes_df) - community_df = query_helper.get_df(community_report_table_path) + community_df = get_df(community_report_table_path) for i in community_df["community"].astype(int): links["community"][i + max_vals["community"] + 1] = { "index_name": sanitized_index_names_link[index_name], @@ -306,7 +308,7 @@ async def local_search_streaming(request: GraphRequest): max_vals["community"] = community_df["community"].astype(int).max() community_dfs.append(community_df) - entities_df = query_helper.get_df(entities_table_path) + entities_df = get_df(entities_table_path) for i in entities_df["human_readable_id"]: links["entities"][i + max_vals["entities"] + 1] = { "index_name": sanitized_index_names_link[index_name], @@ -324,7 +326,7 @@ async def local_search_streaming(request: GraphRequest): max_vals["entities"] = entities_df["human_readable_id"].max() entities_dfs.append(entities_df) - relationships_df = query_helper.get_df(relationships_table_path) + relationships_df = get_df(relationships_table_path) for i in relationships_df["human_readable_id"].astype(int): links["relationships"][i + max_vals["relationships"] + 1] = { "index_name": sanitized_index_names_link[index_name], @@ -351,7 +353,7 @@ async def local_search_streaming(request: GraphRequest): ) relationships_dfs.append(relationships_df) - text_units_df = query_helper.get_df(text_units_table_path) + text_units_df = get_df(text_units_table_path) text_units_df["id"] = text_units_df["id"].apply( lambda x: f"{x}-{index_name}" ) @@ -361,7 +363,7 @@ async def local_search_streaming(request: GraphRequest): index_name ) if index_container_client.get_blob_client(COVARIATES_TABLE).exists(): - covariates_df = query_helper.get_df(covariates_table_path) + covariates_df = get_df(covariates_table_path) if i in covariates_df["human_readable_id"].astype(int): links["covariates"][i + max_vals["covariates"] + 1] = { "index_name": sanitized_index_names_link[index_name], @@ -402,9 +404,9 @@ async def local_search_streaming(request: GraphRequest): parameters.embeddings.vector_store["index_names"] = sanitized_index_names # internally write over the get_embedding_description_store # method to use the multi-index collection. - import graphrag.query.api + import graphrag.api.query - graphrag.query.api._get_embedding_description_store = ( + graphrag.api.query._get_embedding_description_store = ( _get_embedding_description_store ) @@ -428,8 +430,8 @@ async def local_search_streaming(request: GraphRequest): media_type="application/json", ) except Exception as e: - logger = LoggerSingleton().get_instance() - logger.on_error( + logger = load_pipeline_logger() + logger.error( message="Error encountered while streaming local search response", cause=e, stack=traceback.format_exc(), diff --git a/backend/src/api/source.py b/backend/graphrag_app/api/source.py similarity index 53% rename from backend/src/api/source.py rename to backend/graphrag_app/api/source.py index faf19af9..0ad621f1 100644 --- a/backend/src/api/source.py +++ b/backend/graphrag_app/api/source.py @@ -2,22 +2,24 @@ # Licensed under the MIT License. +import traceback + import pandas as pd -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, Depends, HTTPException -from src.api.common import ( - get_pandas_storage_options, - sanitize_name, - validate_index_file_exist, -) -from src.logger import LoggerSingleton -from src.models import ( +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.typing.models import ( ClaimResponse, EntityResponse, RelationshipResponse, ReportResponse, TextUnitResponse, ) +from graphrag_app.utils.common import ( + pandas_storage_options, + sanitize_name, + validate_index_file_exist, +) source_route = APIRouter( prefix="/source", @@ -29,65 +31,77 @@ COVARIATES_TABLE = "output/create_final_covariates.parquet" ENTITY_EMBEDDING_TABLE = "output/create_final_entities.parquet" RELATIONSHIPS_TABLE = "output/create_final_relationships.parquet" -TEXT_UNITS_TABLE = "output/create_base_text_units.parquet" -DOCUMENTS_TABLE = "output/create_base_documents.parquet" +TEXT_UNITS_TABLE = "output/create_final_text_units.parquet" +DOCUMENTS_TABLE = "output/create_final_documents.parquet" @source_route.get( - "/report/{index_name}/{report_id}", + "/report/{container_name}/{report_id}", summary="Return a single community report.", response_model=ReportResponse, responses={200: {"model": ReportResponse}}, ) -async def get_report_info(index_name: str, report_id: str): +async def get_report_info( + report_id: int, + container_name: str, + sanitized_container_name: str = Depends(sanitize_name), +): # check for existence of file the query relies on to validate the index is complete - sanitized_index_name = sanitize_name(index_name) - validate_index_file_exist(sanitized_index_name, COMMUNITY_REPORT_TABLE) + validate_index_file_exist(sanitized_container_name, COMMUNITY_REPORT_TABLE) try: report_table = pd.read_parquet( - f"abfs://{sanitized_index_name}/{COMMUNITY_REPORT_TABLE}", - storage_options=get_pandas_storage_options(), + f"abfs://{sanitized_container_name}/{COMMUNITY_REPORT_TABLE}", + storage_options=pandas_storage_options(), ) # check if report_id exists in the index - if not report_table["community"].isin([report_id]).any(): - raise ValueError(f"Report '{report_id}' not found in index '{index_name}'.") + if not report_table["human_readable_id"].isin([report_id]).any(): + raise ValueError( + f"Report '{report_id}' not found in index '{container_name}'." + ) # check if multiple reports with the same id exist (should not happen) - if len(report_table.loc[report_table["community"] == report_id]) > 1: + if len(report_table.loc[report_table["human_readable_id"] == report_id]) > 1: raise ValueError( - f"Multiple reports with id '{report_id}' found in index '{index_name}'." + f"Multiple reports with id '{report_id}' found in index '{container_name}'." ) report_content = report_table.loc[ - report_table["community"] == report_id, "full_content" + report_table["human_readable_id"] == report_id, "full_content_json" ].to_numpy()[0] return ReportResponse(text=report_content) - except Exception: - logger = LoggerSingleton().get_instance() - logger.on_error("Could not get report.") + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Could not get report.", + cause=e, + stack=traceback.format_exc(), + ) raise HTTPException( status_code=500, - detail=f"Error retrieving report '{report_id}' from index '{index_name}'.", + detail=f"Error retrieving report '{report_id}' from index '{container_name}'.", ) @source_route.get( - "/text/{index_name}/{text_unit_id}", + "/text/{container_name}/{text_unit_id}", summary="Return a single base text unit.", response_model=TextUnitResponse, responses={200: {"model": TextUnitResponse}}, ) -async def get_chunk_info(index_name: str, text_unit_id: str): +async def get_chunk_info( + text_unit_id: str, + container_name: str, + sanitized_container_name: str = Depends(sanitize_name), +): # check for existence of file the query relies on to validate the index is complete - sanitized_index_name = sanitize_name(index_name) - validate_index_file_exist(sanitized_index_name, TEXT_UNITS_TABLE) - validate_index_file_exist(sanitized_index_name, DOCUMENTS_TABLE) + validate_index_file_exist(sanitized_container_name, TEXT_UNITS_TABLE) + validate_index_file_exist(sanitized_container_name, DOCUMENTS_TABLE) try: text_units = pd.read_parquet( - f"abfs://{sanitized_index_name}/{TEXT_UNITS_TABLE}", - storage_options=get_pandas_storage_options(), + f"abfs://{sanitized_container_name}/{TEXT_UNITS_TABLE}", + storage_options=pandas_storage_options(), ) docs = pd.read_parquet( - f"abfs://{sanitized_index_name}/{DOCUMENTS_TABLE}", - storage_options=get_pandas_storage_options(), + f"abfs://{sanitized_container_name}/{DOCUMENTS_TABLE}", + storage_options=pandas_storage_options(), ) # rename columns for easy joining docs = docs[["id", "title"]].rename( @@ -97,9 +111,9 @@ async def get_chunk_info(index_name: str, text_unit_id: str): text_units = text_units.explode("document_ids") # verify that text_unit_id exists in the index - if not text_units["chunk_id"].isin([text_unit_id]).any(): + if not text_units["id"].isin([text_unit_id]).any(): raise ValueError( - f"Text unit '{text_unit_id}' not found in index '{index_name}'." + f"Text unit '{text_unit_id}' not found in index '{container_name}'." ) # combine tables to create a (chunk_id -> source_document) mapping @@ -107,75 +121,91 @@ async def get_chunk_info(index_name: str, text_unit_id: str): docs, left_on="document_ids", right_on="document_id", how="left" ) row = merged_table.loc[ - merged_table["chunk_id"] == text_unit_id, ["chunk", "source_document"] + merged_table["id"] == text_unit_id, ["id", "source_document"] ] return TextUnitResponse( - text=row["chunk"].to_numpy()[0], + text=row["id"].to_numpy()[0], source_document=row["source_document"].to_numpy()[0], ) - except Exception: - logger = LoggerSingleton().get_instance() - logger.on_error("Could not get text chunk.") + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Could not get text chunk.", + cause=e, + stack=traceback.format_exc(), + ) raise HTTPException( status_code=500, - detail=f"Error retrieving text chunk '{text_unit_id}' from index '{index_name}'.", + detail=f"Error retrieving text chunk '{text_unit_id}' from index '{container_name}'.", ) @source_route.get( - "/entity/{index_name}/{entity_id}", + "/entity/{container_name}/{entity_id}", summary="Return a single entity.", response_model=EntityResponse, responses={200: {"model": EntityResponse}}, ) -async def get_entity_info(index_name: str, entity_id: int): +async def get_entity_info( + entity_id: int, + container_name: str, + sanitized_container_name: str = Depends(sanitize_name), +): # check for existence of file the query relies on to validate the index is complete - sanitized_index_name = sanitize_name(index_name) - validate_index_file_exist(sanitized_index_name, ENTITY_EMBEDDING_TABLE) + validate_index_file_exist(sanitized_container_name, ENTITY_EMBEDDING_TABLE) try: entity_table = pd.read_parquet( - f"abfs://{sanitized_index_name}/{ENTITY_EMBEDDING_TABLE}", - storage_options=get_pandas_storage_options(), + f"abfs://{sanitized_container_name}/{ENTITY_EMBEDDING_TABLE}", + storage_options=pandas_storage_options(), ) # check if entity_id exists in the index if not entity_table["human_readable_id"].isin([entity_id]).any(): - raise ValueError(f"Entity '{entity_id}' not found in index '{index_name}'.") + raise ValueError( + f"Entity '{entity_id}' not found in index '{container_name}'." + ) row = entity_table[entity_table["human_readable_id"] == entity_id] return EntityResponse( - name=row["name"].to_numpy()[0], + name=row["title"].to_numpy()[0], description=row["description"].to_numpy()[0], text_units=row["text_unit_ids"].to_numpy()[0].tolist(), ) - except Exception: - logger = LoggerSingleton().get_instance() - logger.on_error("Could not get entity") + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Could not get entity", + cause=e, + stack=traceback.format_exc(), + ) raise HTTPException( status_code=500, - detail=f"Error retrieving entity '{entity_id}' from index '{index_name}'.", + detail=f"Error retrieving entity '{entity_id}' from index '{container_name}'.", ) @source_route.get( - "/claim/{index_name}/{claim_id}", + "/claim/{container_name}/{claim_id}", summary="Return a single claim.", response_model=ClaimResponse, responses={200: {"model": ClaimResponse}}, ) -async def get_claim_info(index_name: str, claim_id: int): +async def get_claim_info( + claim_id: int, + container_name: str, + sanitized_container_name: str = Depends(sanitize_name), +): # check for existence of file the query relies on to validate the index is complete # claims is optional in graphrag - sanitized_index_name = sanitize_name(index_name) try: - validate_index_file_exist(sanitized_index_name, COVARIATES_TABLE) + validate_index_file_exist(sanitized_container_name, COVARIATES_TABLE) except ValueError: raise HTTPException( status_code=500, - detail=f"Claim data unavailable for index '{index_name}'.", + detail=f"Claim data unavailable for index '{container_name}'.", ) try: claims_table = pd.read_parquet( - f"abfs://{sanitized_index_name}/{COVARIATES_TABLE}", - storage_options=get_pandas_storage_options(), + f"abfs://{sanitized_container_name}/{COVARIATES_TABLE}", + storage_options=pandas_storage_options(), ) claims_table.human_readable_id = claims_table.human_readable_id.astype( float @@ -191,56 +221,63 @@ async def get_claim_info(index_name: str, claim_id: int): text_unit_id=row["text_unit_id"].values[0], document_ids=row["document_ids"].values[0].tolist(), ) - except Exception: - logger = LoggerSingleton().get_instance() - logger.on_error("Could not get claim.") + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Could not get claim.", cause=e, stack=traceback.format_exc() + ) raise HTTPException( status_code=500, - detail=f"Error retrieving claim '{claim_id}' from index '{index_name}'.", + detail=f"Error retrieving claim '{claim_id}' for index '{container_name}'.", ) @source_route.get( - "/relationship/{index_name}/{relationship_id}", + "/relationship/{container_name}/{relationship_id}", summary="Return a single relationship.", response_model=RelationshipResponse, responses={200: {"model": RelationshipResponse}}, ) -async def get_relationship_info(index_name: str, relationship_id: int): +async def get_relationship_info( + relationship_id: int, + container_name: str, + sanitized_container_name: str = Depends(sanitize_name), +): # check for existence of file the query relies on to validate the index is complete - sanitized_index_name = sanitize_name(index_name) - validate_index_file_exist(sanitized_index_name, RELATIONSHIPS_TABLE) - validate_index_file_exist(sanitized_index_name, ENTITY_EMBEDDING_TABLE) + validate_index_file_exist(sanitized_container_name, RELATIONSHIPS_TABLE) + validate_index_file_exist(sanitized_container_name, ENTITY_EMBEDDING_TABLE) try: relationship_table = pd.read_parquet( - f"abfs://{sanitized_index_name}/{RELATIONSHIPS_TABLE}", - storage_options=get_pandas_storage_options(), + f"abfs://{sanitized_container_name}/{RELATIONSHIPS_TABLE}", + storage_options=pandas_storage_options(), ) entity_table = pd.read_parquet( - f"abfs://{sanitized_index_name}/{ENTITY_EMBEDDING_TABLE}", - storage_options=get_pandas_storage_options(), + f"abfs://{sanitized_container_name}/{ENTITY_EMBEDDING_TABLE}", + storage_options=pandas_storage_options(), ) row = relationship_table[ - relationship_table.human_readable_id == str(relationship_id) + relationship_table.human_readable_id == relationship_id ] return RelationshipResponse( source=row["source"].values[0], source_id=entity_table[ - entity_table.name == row["source"].values[0] + entity_table.title == row["source"].values[0] ].human_readable_id.values[0], target=row["target"].values[0], target_id=entity_table[ - entity_table.name == row["target"].values[0] + entity_table.title == row["target"].values[0] ].human_readable_id.values[0], description=row["description"].values[0], text_units=[ x[0] for x in row["text_unit_ids"].to_list() ], # extract text_unit_ids from a list of panda series ) - except Exception: - logger = LoggerSingleton().get_instance() - logger.on_error("Could not get relationship.") + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Could not get relationship.", cause=e, stack=traceback.format_exc() + ) raise HTTPException( status_code=500, - detail=f"Error retrieving relationship '{relationship_id}' from index '{index_name}'.", + detail=f"Error retrieving relationship '{relationship_id}' from index '{container_name}'.", ) diff --git a/backend/graphrag_app/logger/__init__.py b/backend/graphrag_app/logger/__init__.py new file mode 100644 index 00000000..5f1cc6db --- /dev/null +++ b/backend/graphrag_app/logger/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from graphrag_app.logger.application_insights_workflow_callbacks import ( + ApplicationInsightsWorkflowCallbacks, +) +from graphrag_app.logger.console_workflow_callbacks import ConsoleWorkflowCallbacks +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.logger.pipeline_job_updater import PipelineJobUpdater +from graphrag_app.logger.typing import ( + Logger, + PipelineAppInsightsReportingConfig, + PipelineReportingConfigTypes, +) + +__all__ = [ + "Logger", + "ApplicationInsightsWorkflowCallbacks", + "ConsoleWorkflowCallbacks", + "PipelineAppInsightsReportingConfig", + "PipelineJobUpdater", + "PipelineReportingConfigTypes", + "load_pipeline_logger", +] diff --git a/backend/src/logger/application_insights_workflow_callbacks.py b/backend/graphrag_app/logger/application_insights_workflow_callbacks.py similarity index 50% rename from backend/src/logger/application_insights_workflow_callbacks.py rename to backend/graphrag_app/logger/application_insights_workflow_callbacks.py index 0f0d08f1..03f7c6b8 100644 --- a/backend/src/logger/application_insights_workflow_callbacks.py +++ b/backend/graphrag_app/logger/application_insights_workflow_callbacks.py @@ -1,48 +1,44 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import hashlib import logging -import time - -# from dataclasses import asdict from typing import ( Any, Dict, Optional, ) -from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter -from datashaper.workflow.workflow_callbacks import NoopWorkflowCallbacks -from opentelemetry._logs import ( - get_logger_provider, - set_logger_provider, -) -from opentelemetry.sdk._logs import ( - LoggerProvider, - LoggingHandler, -) -from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from azure.identity import DefaultAzureCredential +from azure.monitor.opentelemetry import configure_azure_monitor +from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks class ApplicationInsightsWorkflowCallbacks(NoopWorkflowCallbacks): - """A logger that writes to an AppInsights Workspace.""" + """A singleton class logger that writes to an AppInsights Workspace.""" _logger: logging.Logger _logger_name: str _logger_level: int - _logger_level_name: str _properties: Dict[str, Any] _workflow_name: str _index_name: str _num_workflow_steps: int _processed_workflow_steps: list[str] = [] + _instance = None + + def __new__(cls, *args, **kwargs): + # follow a singleton pattern to ensure only one instance of the logger is created + # reference: https://builtin.com/data-science/new-python + if not cls._instance: + cls._instance = super(ApplicationInsightsWorkflowCallbacks, cls).__new__( + cls + ) + return cls._instance + def __init__( self, - connection_string: str, - logger_name: str | None = None, - logger_level: int = logging.INFO, + logger_name: str = "graphrag-accelerator", index_name: str = "", num_workflow_steps: int = 0, properties: Dict[str, Any] = {}, @@ -51,62 +47,35 @@ def __init__( Initialize the AppInsightsReporter. Args: - connection_string (str): The connection string for the App Insights instance. logger_name (str | None, optional): The name of the logger. Defaults to None. - logger_level (int, optional): The logging level. Defaults to logging.INFO. index_name (str, optional): The name of an index. Defaults to "". num_workflow_steps (int): A list of workflow names ordered by their execution. Defaults to []. properties (Dict[str, Any], optional): Additional properties to be included in the log. Defaults to {}. """ - self._logger: logging.Logger - self._logger_name = logger_name - self._logger_level = logger_level - self._logger_level_name: str = logging.getLevelName(logger_level) - self._properties = properties - self._workflow_name = "N/A" - self._index_name = index_name - self._num_workflow_steps = num_workflow_steps - self._processed_workflow_steps = [] # maintain a running list of workflow steps that get processed - """Create a new logger with an AppInsights handler.""" - self.__init_logger(connection_string=connection_string) - - def __init_logger(self, connection_string, max_logger_init_retries: int = 10): - max_retry = max_logger_init_retries - while not (hasattr(self, "_logger")): - if max_retry == 0: - raise Exception( - "Failed to create logger. Could not disambiguate logger name." - ) - - # generate a unique logger name - current_time = str(time.time()) - unique_hash = hashlib.sha256(current_time.encode()).hexdigest() - self._logger_name = f"{self.__class__.__name__}-{unique_hash}" - if self._logger_name not in logging.Logger.manager.loggerDict: - # attach azure monitor log exporter to logger provider - logger_provider = LoggerProvider() - set_logger_provider(logger_provider) - exporter = AzureMonitorLogExporter(connection_string=connection_string) - get_logger_provider().add_log_record_processor( - BatchLogRecordProcessor( - exporter=exporter, - schedule_delay_millis=60000, - ) - ) - # instantiate new logger - self._logger = logging.getLogger(self._logger_name) - self._logger.propagate = False - # remove any existing handlers - self._logger.handlers.clear() - # fetch handler from logger provider and attach to class - self._logger.addHandler(LoggingHandler()) - # set logging level - self._logger.setLevel(logging.DEBUG) - - # reduce sentinel counter value - max_retry -= 1 - - def _format_details(self, details: Dict[str, Any] | None = None) -> Dict[str, Any]: + if not hasattr(self, "initialized"): + self.initialized = True + self._logger_name = logger_name + self._index_name = index_name + self._num_workflow_steps = num_workflow_steps + self._properties = properties + self._workflow_name = "N/A" + self._processed_workflow_steps = [] # if logger is used in a pipeline job, maintain a running list of workflows that are processed + # initialize a new logger with an AppInsights handler + self.__init_logger() + + def __init_logger(self, max_logger_init_retries: int = 10): + # Configure OpenTelemetry to use Azure Monitor with the + # APPLICATIONINSIGHTS_CONNECTION_STRING environment variable + configure_azure_monitor( + logger_name=self._logger_name, + disable_offline_storage=True, + enable_live_metrics=True, + credential=DefaultAzureCredential(), + ) + self._logger = logging.getLogger(self._logger_name) + self._logger.setLevel(logging.INFO) + + def _format_details(self, details: Dict[str, Any] = {}) -> Dict[str, Any]: """ Format the details dictionary to comply with the Application Insights structured logging Property column standard. @@ -117,11 +86,15 @@ def _format_details(self, details: Dict[str, Any] | None = None) -> Dict[str, An Returns: Dict[str, Any]: The formatted details dictionary with custom dimensions. """ - if not isinstance(details, dict) or (details is None): + if not isinstance(details, dict): return {} - return {"custom_dimensions": {**self._properties, **unwrap_dict(details)}} + extra_details = {**unwrap_dict(details)} + return { + **(self._properties if self._properties else {}), + **(extra_details if extra_details else {}), + } - def on_workflow_start(self, name: str, instance: object) -> None: + def workflow_start(self, name: str, instance: object) -> None: """Execute this callback when a workflow starts.""" self._workflow_name = name self._processed_workflow_steps.append(name) @@ -132,17 +105,14 @@ def on_workflow_start(self, name: str, instance: object) -> None: else "" ) # will take the form "(1/4)" message += f"Workflow{workflow_progress}: {name} started." - details = { - "workflow_name": name, - # "workflow_instance": str(instance), - } + details = {"workflow_name": name} if self._index_name: details["index_name"] = self._index_name self._logger.info( message, stack_info=False, extra=self._format_details(details=details) ) - def on_workflow_end(self, name: str, instance: object) -> None: + def workflow_end(self, name: str, instance: object) -> None: """Execute this callback when a workflow ends.""" message = f"Index: {self._index_name} -- " if self._index_name else "" workflow_progress = ( @@ -151,25 +121,21 @@ def on_workflow_end(self, name: str, instance: object) -> None: else "" ) # will take the form "(1/4)" message += f"Workflow{workflow_progress}: {name} complete." - details = { - "workflow_name": name, - # "workflow_instance": str(instance), - } + details = {"workflow_name": name} if self._index_name: details["index_name"] = self._index_name self._logger.info( message, stack_info=False, extra=self._format_details(details=details) ) - def on_error( + def error( self, message: str, cause: Optional[BaseException] = None, stack: Optional[str] = None, - details: Optional[dict] = None, + details: Optional[dict] = {}, ) -> None: """A call back handler for when an error occurs.""" - details = {} if details is None else details details = {"cause": str(cause), "stack": stack, **details} self._logger.error( message, @@ -178,28 +144,22 @@ def on_error( extra=self._format_details(details=details), ) - def on_warning(self, message: str, details: Optional[dict] = None) -> None: + def warning(self, message: str, details: Optional[dict] = None) -> None: """A call back handler for when a warning occurs.""" self._logger.warning( message, stack_info=False, extra=self._format_details(details=details) ) - def on_log(self, message: str, details: Optional[dict] = None) -> None: + def log(self, message: str, details: Optional[dict] = None) -> None: """A call back handler for when a log message occurs.""" self._logger.info( message, stack_info=False, extra=self._format_details(details=details) ) - def on_measure( - self, name: str, value: float, details: Optional[dict] = None - ) -> None: - """A call back handler for when a measurement occurs.""" - raise NotImplementedError("on_measure() not supported by this logger.") - def unwrap_dict(input_dict, parent_key="", sep="_"): """ - Recursively unwraps a nested dictionary by flattening it into a single-level dictionary. + Recursively unwrap/flatten a dictionary. Args: input_dict (dict): The input dictionary to be unwrapped. diff --git a/backend/src/logger/blob_workflow_callbacks.py b/backend/graphrag_app/logger/blob_workflow_callbacks.py similarity index 90% rename from backend/src/logger/blob_workflow_callbacks.py rename to backend/graphrag_app/logger/blob_workflow_callbacks.py index 2b9ed60a..6a983334 100644 --- a/backend/src/logger/blob_workflow_callbacks.py +++ b/backend/graphrag_app/logger/blob_workflow_callbacks.py @@ -4,12 +4,11 @@ from datetime import datetime from typing import ( Any, - Optional, ) from azure.storage.blob import BlobServiceClient -from datashaper import NoopWorkflowCallbacks from devtools import pformat +from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks class BlobWorkflowCallbacks(NoopWorkflowCallbacks): @@ -72,23 +71,7 @@ def _write_log(self, log: dict[str, Any]): blob_client.append_block(pformat(log, indent=2) + "\n") self._num_blocks += 1 - def on_error( - self, - message: str, - cause: BaseException | None = None, - stack: str | None = None, - details: dict | None = None, - ): - """Report an error.""" - self._write_log({ - "type": "error", - "data": message, - "cause": str(cause), - "stack": stack, - "details": details, - }) - - def on_workflow_start(self, name: str, instance: object) -> None: + def workflow_start(self, name: str, instance: object) -> None: """Execute this callback when a workflow starts.""" self._workflow_name = name self._processed_workflow_steps.append(name) @@ -111,7 +94,7 @@ def on_workflow_start(self, name: str, instance: object) -> None: "details": details, }) - def on_workflow_end(self, name: str, instance: object) -> None: + def workflow_end(self, name: str, instance: object) -> None: """Execute this callback when a workflow ends.""" message = f"Index: {self._index_name} -- " if self._index_name else "" workflow_progress = ( @@ -132,16 +115,26 @@ def on_workflow_end(self, name: str, instance: object) -> None: "details": details, }) - def on_warning(self, message: str, details: dict | None = None): + def error( + self, + message: str, + cause: BaseException | None = None, + stack: str | None = None, + details: dict | None = None, + ): + """Report an error.""" + self._write_log({ + "type": "error", + "data": message, + "cause": str(cause), + "stack": stack, + "details": details, + }) + + def warning(self, message: str, details: dict | None = None): """Report a warning.""" self._write_log({"type": "warning", "data": message, "details": details}) - def on_log(self, message: str, details: dict | None = None): + def log(self, message: str, details: dict | None = None): """Report a generic log message.""" self._write_log({"type": "log", "data": message, "details": details}) - - def on_measure( - self, name: str, value: float, details: Optional[dict] = None - ) -> None: - """A call back handler for when a measurement occurs.""" - pass diff --git a/backend/src/logger/console_workflow_callbacks.py b/backend/graphrag_app/logger/console_workflow_callbacks.py similarity index 92% rename from backend/src/logger/console_workflow_callbacks.py rename to backend/graphrag_app/logger/console_workflow_callbacks.py index a9be1c1c..0b71f681 100644 --- a/backend/src/logger/console_workflow_callbacks.py +++ b/backend/graphrag_app/logger/console_workflow_callbacks.py @@ -11,7 +11,7 @@ Optional, ) -from datashaper.workflow.workflow_callbacks import NoopWorkflowCallbacks +from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks class ConsoleWorkflowCallbacks(NoopWorkflowCallbacks): @@ -107,7 +107,7 @@ def _format_details(self, details: Dict[str, Any] | None = None) -> Dict[str, An details = {} return {**self._properties, **details} - def on_workflow_start(self, name: str, instance: object) -> None: + def workflow_start(self, name: str, instance: object) -> None: """Execute this callback when a workflow starts.""" self._workflow_name = name self._processed_workflow_steps.append(name) @@ -128,7 +128,7 @@ def on_workflow_start(self, name: str, instance: object) -> None: message, stack_info=False, extra=self._format_details(details=details) ) - def on_workflow_end(self, name: str, instance: object) -> None: + def workflow_end(self, name: str, instance: object) -> None: """Execute this callback when a workflow ends.""" message = f"Index: {self._index_name} -- " if self._index_name else "" workflow_progress = ( @@ -147,7 +147,7 @@ def on_workflow_end(self, name: str, instance: object) -> None: message, stack_info=False, extra=self._format_details(details=details) ) - def on_error( + def error( self, message: str, cause: Optional[BaseException] = None, @@ -164,20 +164,14 @@ def on_error( extra=self._format_details(details=details), ) - def on_warning(self, message: str, details: Optional[dict] = None) -> None: + def warning(self, message: str, details: Optional[dict] = None) -> None: """A call back handler for when a warning occurs.""" self._logger.warning( message, stack_info=False, extra=self._format_details(details=details) ) - def on_log(self, message: str, details: Optional[dict] = None) -> None: + def log(self, message: str, details: Optional[dict] = None) -> None: """A call back handler for when a log message occurs.""" self._logger.info( message, stack_info=False, extra=self._format_details(details=details) ) - - def on_measure( - self, name: str, value: float, details: Optional[dict] = None - ) -> None: - """A call back handler for when a measurement occurs.""" - pass diff --git a/backend/graphrag_app/logger/load_logger.py b/backend/graphrag_app/logger/load_logger.py new file mode 100644 index 00000000..eea0a34c --- /dev/null +++ b/backend/graphrag_app/logger/load_logger.py @@ -0,0 +1,78 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import os +from pathlib import Path +from typing import List + +from graphrag.callbacks.file_workflow_callbacks import FileWorkflowCallbacks +from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks +from graphrag.callbacks.workflow_callbacks_manager import WorkflowCallbacksManager + +from graphrag_app.logger.application_insights_workflow_callbacks import ( + ApplicationInsightsWorkflowCallbacks, +) +from graphrag_app.logger.blob_workflow_callbacks import BlobWorkflowCallbacks +from graphrag_app.logger.console_workflow_callbacks import ConsoleWorkflowCallbacks +from graphrag_app.logger.typing import Logger +from graphrag_app.utils.azure_clients import AzureClientManager + + +def load_pipeline_logger( + logging_dir: str = "", + index_name: str = "", + num_workflow_steps: int = 0, +) -> WorkflowCallbacks: + """Create and load a list of loggers. + + This function creates loggers for two different scenarios. Loggers can be instantiated as generic loggers or associated with a specified indexing job. + 1. When an indexing job is running, custom index-specific loggers are created to log the job activity + 2. When the fastapi app is running, generic loggers are used to log the app's activities. + """ + loggers: List[Logger] = [] + for logger_type in ["BLOB", "CONSOLE", "APP_INSIGHTS"]: + loggers.append(Logger[logger_type]) + + azure_client_manager = AzureClientManager() + callback_manager = WorkflowCallbacksManager() + for logger in loggers: + match logger: + case Logger.BLOB: + # create a dedicated container for logs + log_blob_name = "logs" + if logging_dir: + log_blob_name = os.path.join(logging_dir, log_blob_name) + # ensure the root directory exists; if not, create it + blob_service_client = azure_client_manager.get_blob_service_client() + container_root = Path(log_blob_name).parts[0] + if not blob_service_client.get_container_client( + container_root + ).exists(): + blob_service_client.create_container(container_root) + callback_manager.register( + BlobWorkflowCallbacks( + blob_service_client=blob_service_client, + container_name=log_blob_name, + index_name=index_name, + num_workflow_steps=num_workflow_steps, + ) + ) + case Logger.FILE: + callback_manager.register(FileWorkflowCallbacks(dir=logging_dir)) + case Logger.APP_INSIGHTS: + if os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING"): + callback_manager.register( + ApplicationInsightsWorkflowCallbacks( + index_name=index_name, + num_workflow_steps=num_workflow_steps, + ) + ) + case Logger.CONSOLE: + callback_manager.register( + ConsoleWorkflowCallbacks( + index_name=index_name, num_workflow_steps=num_workflow_steps + ) + ) + case _: + print(f"WARNING: unknown logger type: {logger}. Skipping.") + return callback_manager diff --git a/backend/src/logger/pipeline_job_workflow_callbacks.py b/backend/graphrag_app/logger/pipeline_job_updater.py similarity index 55% rename from backend/src/logger/pipeline_job_workflow_callbacks.py rename to backend/graphrag_app/logger/pipeline_job_updater.py index 3cac3b99..2605ce4b 100644 --- a/backend/src/logger/pipeline_job_workflow_callbacks.py +++ b/backend/graphrag_app/logger/pipeline_job_updater.py @@ -1,18 +1,18 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from datashaper.workflow.workflow_callbacks import NoopWorkflowCallbacks +from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks -from src.typing.pipeline import PipelineJobState -from src.utils.pipeline import PipelineJob +from graphrag_app.typing.pipeline import PipelineJobState +from graphrag_app.utils.pipeline import PipelineJob -class PipelineJobWorkflowCallbacks(NoopWorkflowCallbacks): - """A reporter that writes to a stream (sys.stdout).""" +class PipelineJobUpdater(NoopWorkflowCallbacks): + """A callback that records pipeline updates.""" - def __init__(self, pipeline_job: "PipelineJob"): + def __init__(self, pipeline_job: PipelineJob): """ - This class defines a set of callback methods that can be used to report the progress and status of a workflow job. + This class defines a set of callback methods that can be used to log the progress of a pipeline job. It inherits from the NoopWorkflowCallbacks class, which provides default implementations for all the callback methods. Attributes: @@ -21,14 +21,12 @@ def __init__(self, pipeline_job: "PipelineJob"): """ self._pipeline_job = pipeline_job - def on_workflow_start(self, name: str, instance: object) -> None: + def workflow_start(self, name: str, instance: object) -> None: """Execute this callback when a workflow starts.""" - # if we are not already running, set the status to running - if self._pipeline_job.status != PipelineJobState.RUNNING: - self._pipeline_job.status = PipelineJobState.RUNNING + self._pipeline_job.status = PipelineJobState.RUNNING self._pipeline_job.progress = f"Workflow {name} started." - def on_workflow_end(self, name: str, instance: object) -> None: + def workflow_end(self, name: str, instance: object) -> None: """Execute this callback when a workflow ends.""" self._pipeline_job.completed_workflows.append(name) self._pipeline_job.update_db() diff --git a/backend/src/logger/typing.py b/backend/graphrag_app/logger/typing.py similarity index 83% rename from backend/src/logger/typing.py rename to backend/graphrag_app/logger/typing.py index 0e543847..533e535d 100644 --- a/backend/src/logger/typing.py +++ b/backend/graphrag_app/logger/typing.py @@ -5,14 +5,14 @@ from enum import Enum from typing import Literal -from graphrag.index.config import ( +from graphrag.index.config.reporting import ( PipelineReportingConfig, - reporting, + PipelineReportingConfigTypes, ) from pydantic import Field as pydantic_Field -class Reporters(Enum): +class Logger(Enum): BLOB = (1, "blob") CONSOLE = (2, "console") FILE = (3, "file") @@ -24,7 +24,7 @@ class PipelineAppInsightsReportingConfig( ): """Represents the ApplicationInsights reporting configuration for the pipeline.""" - type: Literal["app_insights"] = Reporters.APP_INSIGHTS.name.lower() + type: Literal["app_insights"] = Logger.APP_INSIGHTS.name.lower() """The type of reporting.""" connection_string: str = pydantic_Field( @@ -46,5 +46,5 @@ class PipelineAppInsightsReportingConfig( # add the new type to the existing PipelineReportingConfigTypes PipelineReportingConfigTypes = ( - reporting.PipelineReportingConfigTypes | PipelineAppInsightsReportingConfig + PipelineReportingConfigTypes | PipelineAppInsightsReportingConfig ) diff --git a/backend/src/main.py b/backend/graphrag_app/main.py similarity index 70% rename from backend/src/main.py rename to backend/graphrag_app/main.py index 176f1ef6..14e178de 100644 --- a/backend/src/main.py +++ b/backend/graphrag_app/main.py @@ -4,8 +4,10 @@ import os import traceback from contextlib import asynccontextmanager +from pathlib import Path import yaml +from azure.cosmos import PartitionKey, ThroughputProperties from fastapi import ( FastAPI, Request, @@ -19,15 +21,14 @@ config, ) -from src.api.azure_clients import AzureClientManager -from src.api.data import data_route -from src.api.graph import graph_route -from src.api.index import index_route -from src.api.index_configuration import index_configuration_route -from src.api.query import query_route -from src.api.query_streaming import query_streaming_route -from src.api.source import source_route -from src.logger import LoggerSingleton +from graphrag_app.api.data import data_route +from graphrag_app.api.graph import graph_route +from graphrag_app.api.index import index_route +from graphrag_app.api.prompt_tuning import prompt_tuning_route +from graphrag_app.api.query import query_route +from graphrag_app.api.source import source_route +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.utils.azure_clients import AzureClientManager async def catch_all_exceptions_middleware(request: Request, call_next): @@ -35,9 +36,9 @@ async def catch_all_exceptions_middleware(request: Request, call_next): try: return await call_next(request) except Exception as e: - reporter = LoggerSingleton().get_instance() + reporter = load_pipeline_logger() stack = traceback.format_exc() - reporter.on_error( + reporter.error( message="Unexpected internal server error", cause=e, stack=stack, @@ -49,10 +50,18 @@ def intialize_cosmosdb_setup(): """Initialise CosmosDB (if necessary) by setting up a database and containers that are expected at startup time.""" azure_client_manager = AzureClientManager() client = azure_client_manager.get_cosmos_client() - client.create_database_if_not_exists("graphrag") - client.get_database_client("graphrag").create_container_if_not_exists("jobs", "/id") - client.get_database_client("graphrag").create_container_if_not_exists( - "container-store", "/id" + db_client = client.create_database_if_not_exists("graphrag") + # create containers with default settings + throughput = ThroughputProperties( + auto_scale_max_throughput=1000, auto_scale_increment_percent=1 + ) + db_client.create_container_if_not_exists( + id="jobs", partition_key=PartitionKey(path="/id"), offer_throughput=throughput + ) + db_client.create_container_if_not_exists( + id="container-store", + partition_key=PartitionKey(path="/id"), + offer_throughput=throughput, ) @@ -81,8 +90,9 @@ async def lifespan(app: FastAPI): pod = core_v1.read_namespaced_pod( name=pod_name, namespace=os.environ["AKS_NAMESPACE"] ) - # load the cronjob manifest template and update PLACEHOLDER values with correct values using the pod spec - with open("indexing-job-manager-template.yaml", "r") as f: + # load the k8s cronjob template and update PLACEHOLDER values with correct values based on the running pod spec + ROOT_DIR = Path(__file__).resolve().parent.parent + with (ROOT_DIR / "manifests/cronjob.yaml").open("r") as f: manifest = yaml.safe_load(f) manifest["spec"]["jobTemplate"]["spec"]["template"]["spec"]["containers"][0][ "image" @@ -103,8 +113,8 @@ async def lifespan(app: FastAPI): ) except Exception as e: print("Failed to create graphrag cronjob.") - logger = LoggerSingleton().get_instance() - logger.on_error( + logger = load_pipeline_logger() + logger.error( message="Failed to create graphrag cronjob", cause=str(e), stack=traceback.format_exc(), @@ -119,8 +129,11 @@ async def lifespan(app: FastAPI): root_path=os.getenv("API_ROOT_PATH", ""), title="GraphRAG", version=os.getenv("GRAPHRAG_VERSION", "undefined_version"), - lifespan=lifespan, + lifespan=lifespan + if os.getenv("KUBERNETES_SERVICE_HOST") + else None, # only set lifespan if running in AKS (by checking for a default k8s environment variable) ) + app.middleware("http")(catch_all_exceptions_middleware) app.add_middleware( CORSMiddleware, @@ -132,8 +145,8 @@ async def lifespan(app: FastAPI): app.include_router(data_route) app.include_router(index_route) app.include_router(query_route) -app.include_router(query_streaming_route) -app.include_router(index_configuration_route) +# app.include_router(query_streaming_route) # temporarily disable streaming endpoints +app.include_router(prompt_tuning_route) app.include_router(source_route) app.include_router(graph_route) diff --git a/backend/src/typing/__init__.py b/backend/graphrag_app/typing/__init__.py similarity index 100% rename from backend/src/typing/__init__.py rename to backend/graphrag_app/typing/__init__.py diff --git a/backend/src/models.py b/backend/graphrag_app/typing/models.py similarity index 97% rename from backend/src/models.py rename to backend/graphrag_app/typing/models.py index 702ef388..229356a2 100644 --- a/backend/src/models.py +++ b/backend/graphrag_app/typing/models.py @@ -31,7 +31,7 @@ class EntityResponse(BaseModel): class GraphRequest(BaseModel): - index_name: str | List[str] + index_name: str query: str community_level: int | None = None diff --git a/backend/src/typing/pipeline.py b/backend/graphrag_app/typing/pipeline.py similarity index 59% rename from backend/src/typing/pipeline.py rename to backend/graphrag_app/typing/pipeline.py index 55ed9668..6f0bce63 100644 --- a/backend/src/typing/pipeline.py +++ b/backend/graphrag_app/typing/pipeline.py @@ -4,8 +4,12 @@ from enum import Enum -class PipelineJobState(Enum): +class PipelineJobState(str, Enum): SCHEDULED = "scheduled" RUNNING = "running" FAILED = "failed" COMPLETE = "complete" + + def __repr__(self): + """Get a string representation.""" + return f'"{self.value}"' diff --git a/backend/graphrag_app/utils/__init__.py b/backend/graphrag_app/utils/__init__.py new file mode 100644 index 00000000..59e481eb --- /dev/null +++ b/backend/graphrag_app/utils/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. diff --git a/backend/src/api/azure_clients.py b/backend/graphrag_app/utils/azure_clients.py similarity index 100% rename from backend/src/api/azure_clients.py rename to backend/graphrag_app/utils/azure_clients.py diff --git a/backend/graphrag_app/utils/common.py b/backend/graphrag_app/utils/common.py new file mode 100644 index 00000000..ad0dc29f --- /dev/null +++ b/backend/graphrag_app/utils/common.py @@ -0,0 +1,191 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import hashlib +import os +import traceback + +import pandas as pd +from azure.core.exceptions import ResourceNotFoundError +from azure.cosmos import ContainerProxy, exceptions +from azure.identity import DefaultAzureCredential +from azure.storage.blob.aio import ContainerClient +from fastapi import HTTPException + +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.utils.azure_clients import AzureClientManager + + +def get_df( + table_path: str, +) -> pd.DataFrame: + df = pd.read_parquet( + table_path, + storage_options=pandas_storage_options(), + ) + return df + + +def pandas_storage_options() -> dict: + """Generate the storage options required by pandas to read parquet files from Storage.""" + # For more information on the options available, see: https://github.com/fsspec/adlfs?tab=readme-ov-file#setting-credentials + azure_client_manager = AzureClientManager() + options = { + "account_name": azure_client_manager.storage_account_name, + "account_host": azure_client_manager.storage_account_hostname, + } + if os.getenv("STORAGE_CONNECTION_STRING"): + options["connection_string"] = os.getenv("STORAGE_CONNECTION_STRING") + else: + options["credential"] = DefaultAzureCredential() + return options + + +def delete_storage_container_if_exist(container_name: str): + """ + Delete a blob container. If it does not exist, do nothing. + If exception is raised, the calling function should catch it. + """ + azure_client_manager = AzureClientManager() + blob_service_client = azure_client_manager.get_blob_service_client() + try: + blob_service_client.delete_container(container_name) + except ResourceNotFoundError: + # do nothing if container does not exist + pass + + +def delete_cosmos_container_item_if_exist(container: str, item_id: str): + """ + Delete an item from a cosmosdb container. If it does not exist, do nothing. + If exception is raised, the calling function should catch it. + """ + azure_client_manager = AzureClientManager() + try: + azure_client_manager.get_cosmos_container_client( + database="graphrag", container=container + ).delete_item(item_id, item_id) + except ResourceNotFoundError: + # do nothing if item does not exist + pass + + +def validate_index_file_exist(sanitized_container_name: str, file_name: str): + """ + Check if index exists and that the specified blob file exists. + + A "valid" index is defined by having an entry in the container-store table in cosmos db. + Further checks are done to ensure the blob container and file exist. + + Args: + ----- + sanitized_container_name (str) + Sanitized name of a blob container. + file_name (str) + The blob file to be validated. + + Raises: ValueError + """ + azure_client_manager = AzureClientManager() + original_container_name = desanitize_name(sanitized_container_name) + try: + cosmos_container_client = get_cosmos_container_store_client() + cosmos_container_client.read_item( + sanitized_container_name, sanitized_container_name + ) + except Exception: + raise ValueError(f"{original_container_name} is not a valid index.") + # check for file existence + index_container_client = ( + azure_client_manager.get_blob_service_client().get_container_client( + sanitized_container_name + ) + ) + if not index_container_client.exists(): + raise ValueError(f"{original_container_name} not found.") + if not index_container_client.get_blob_client(file_name).exists(): + raise ValueError( + f"File {file_name} unavailable for container {original_container_name}." + ) + + +def get_cosmos_container_store_client() -> ContainerProxy: + try: + azure_client_manager = AzureClientManager() + return azure_client_manager.get_cosmos_container_client( + database="graphrag", container="container-store" + ) + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Error fetching cosmosdb client.", + cause=e, + stack=traceback.format_exc(), + ) + raise HTTPException(status_code=500, detail="Error fetching cosmosdb client.") + + +async def get_blob_container_client(name: str) -> ContainerClient: + try: + azure_client_manager = AzureClientManager() + blob_service_client = azure_client_manager.get_blob_service_client_async() + container_client = blob_service_client.get_container_client(name) + if not await container_client.exists(): + await container_client.create_container() + return container_client + except Exception as e: + logger = load_pipeline_logger() + logger.error( + message="Error fetching storage client.", + cause=e, + stack=traceback.format_exc(), + ) + raise HTTPException(status_code=500, detail="Error fetching storage client.") + + +def sanitize_name(container_name: str) -> str: + """ + Sanitize a user-provided string to be used as an Azure Storage container name. + Convert the string to a SHA256 hash, then truncate to 128 bit length to ensure + it is within the 63 character limit imposed by Azure Storage. + + The sanitized name will be used to identify container names in both Azure Storage and CosmosDB. + + Args: + ----- + name (str) + The name to be sanitized. + + Returns: str + The sanitized name. + """ + container_name = container_name.encode() + hashed_name = hashlib.sha256(container_name) + truncated_hash = hashed_name.digest()[:16] # get the first 16 bytes (128 bits) + return truncated_hash.hex() + + +def desanitize_name(sanitized_container_name: str) -> str | None: + """ + Reverse the sanitization process by retrieving the original user-provided name. + + Args: + ----- + sanitized_name (str) + The sanitized name to be converted back to the original name. + + Returns: str | None + The original human-readable name or None if it does not exist. + """ + try: + container_store_client = get_cosmos_container_store_client() + try: + return container_store_client.read_item( + sanitized_container_name, sanitized_container_name + )["human_readable_name"] + except exceptions.CosmosResourceNotFoundError: + return None + except Exception: + raise HTTPException( + status_code=500, detail="Error retrieving original container name." + ) diff --git a/backend/src/utils/pipeline.py b/backend/graphrag_app/utils/pipeline.py similarity index 86% rename from backend/src/utils/pipeline.py rename to backend/graphrag_app/utils/pipeline.py index b05cf5c4..1e1b3ab1 100644 --- a/backend/src/utils/pipeline.py +++ b/backend/graphrag_app/utils/pipeline.py @@ -8,9 +8,9 @@ from azure.cosmos.exceptions import CosmosHttpResponseError -from src.api.azure_clients import AzureClientManager -from src.api.common import sanitize_name -from src.typing.pipeline import PipelineJobState +from graphrag_app.typing.pipeline import PipelineJobState +from graphrag_app.utils.azure_clients import AzureClientManager +from graphrag_app.utils.common import sanitize_name @dataclass @@ -18,6 +18,7 @@ class PipelineJob: """Indexing Pipeline Job metadata This is a custom class where the attributes are stored/retrieved in cosmosdb. + # TODO: fix the class so initiliazation is not required """ _id: str = field(default=None, init=False) @@ -27,16 +28,19 @@ class PipelineJob: _sanitized_index_name: str = field(default=None, init=False) _human_readable_storage_name: str = field(default=None, init=False) _sanitized_storage_name: str = field(default=None, init=False) - _entity_extraction_prompt: str = field(default=None, init=False) - _community_report_prompt: str = field(default=None, init=False) - _summarize_descriptions_prompt: str = field(default=None, init=False) + _all_workflows: List[str] = field(default_factory=list, init=False) _completed_workflows: List[str] = field(default_factory=list, init=False) _failed_workflows: List[str] = field(default_factory=list, init=False) + _status: PipelineJobState = field(default=None, init=False) _percent_complete: float = field(default=0, init=False) _progress: str = field(default="", init=False) + _entity_extraction_prompt: str = field(default=None, init=False) + _entity_summarization_prompt: str = field(default=None, init=False) + _community_summarization_prompt: str = field(default=None, init=False) + @staticmethod def _jobs_container(): azure_storage_client = AzureClientManager() @@ -51,8 +55,8 @@ def create_item( human_readable_index_name: str, human_readable_storage_name: str, entity_extraction_prompt: str | None = None, - community_report_prompt: str | None = None, - summarize_descriptions_prompt: str | None = None, + entity_summarization_prompt: str | None = None, + community_summarization_prompt: str | None = None, **kwargs, ) -> "PipelineJob": """ @@ -95,18 +99,21 @@ def create_item( instance._sanitized_index_name = sanitize_name(human_readable_index_name) instance._human_readable_storage_name = human_readable_storage_name instance._sanitized_storage_name = sanitize_name(human_readable_storage_name) - instance._entity_extraction_prompt = entity_extraction_prompt - instance._community_report_prompt = community_report_prompt - instance._summarize_descriptions_prompt = summarize_descriptions_prompt + instance._all_workflows = kwargs.get("all_workflows", []) instance._completed_workflows = kwargs.get("completed_workflows", []) instance._failed_workflows = kwargs.get("failed_workflows", []) + instance._status = PipelineJobState( kwargs.get("status", PipelineJobState.SCHEDULED.value) ) instance._percent_complete = kwargs.get("percent_complete", 0.0) instance._progress = kwargs.get("progress", "") + instance._entity_extraction_prompt = entity_extraction_prompt + instance._entity_summarization_prompt = entity_summarization_prompt + instance._community_summarization_prompt = community_summarization_prompt + # Create the item in the database instance.update_db() return instance @@ -140,17 +147,22 @@ def load_item(cls, id: str) -> "PipelineJob": "human_readable_storage_name" ) instance._sanitized_storage_name = db_item.get("sanitized_storage_name") - instance._entity_extraction_prompt = db_item.get("entity_extraction_prompt") - instance._community_report_prompt = db_item.get("community_report_prompt") - instance._summarize_descriptions_prompt = db_item.get( - "summarize_descriptions_prompt" - ) + instance._all_workflows = db_item.get("all_workflows", []) instance._completed_workflows = db_item.get("completed_workflows", []) instance._failed_workflows = db_item.get("failed_workflows", []) + instance._status = PipelineJobState(db_item.get("status")) instance._percent_complete = db_item.get("percent_complete", 0.0) instance._progress = db_item.get("progress", "") + + instance._entity_extraction_prompt = db_item.get("entity_extraction_prompt") + instance._entity_summarization_prompt = db_item.get( + "entity_summarization_prompt" + ) + instance._community_summarization_prompt = db_item.get( + "community_summarization_prompt" + ) return instance @staticmethod @@ -191,10 +203,12 @@ def dump_model(self) -> dict: } if self._entity_extraction_prompt: model["entity_extraction_prompt"] = self._entity_extraction_prompt - if self._community_report_prompt: - model["community_report_prompt"] = self._community_report_prompt - if self._summarize_descriptions_prompt: - model["summarize_descriptions_prompt"] = self._summarize_descriptions_prompt + if self._entity_summarization_prompt: + model["entity_summarization_prompt"] = self._entity_summarization_prompt + if self._community_summarization_prompt: + model["community_summarization_prompt"] = ( + self._community_summarization_prompt + ) return model def update_db(self): @@ -268,21 +282,23 @@ def entity_extraction_prompt(self, entity_extraction_prompt: str) -> None: self.update_db() @property - def community_report_prompt(self) -> str: - return self._community_report_prompt + def entity_summarization_prompt(self) -> str: + return self._entity_summarization_prompt - @community_report_prompt.setter - def community_report_prompt(self, community_report_prompt: str) -> None: - self._community_report_prompt = community_report_prompt + @entity_summarization_prompt.setter + def entity_summarization_prompt(self, entity_summarization_prompt: str) -> None: + self._entity_summarization_prompt = entity_summarization_prompt self.update_db() @property - def summarize_descriptions_prompt(self) -> str: - return self._summarize_descriptions_prompt - - @summarize_descriptions_prompt.setter - def summarize_descriptions_prompt(self, summarize_descriptions_prompt: str) -> None: - self._summarize_descriptions_prompt = summarize_descriptions_prompt + def community_summarization_prompt(self) -> str: + return self._community_summarization_prompt + + @community_summarization_prompt.setter + def community_summarization_prompt( + self, community_summarization_prompt: str + ) -> None: + self._community_summarization_prompt = community_summarization_prompt self.update_db() @property diff --git a/backend/indexing-job-manager-template.yaml b/backend/indexing-job-manager-template.yaml deleted file mode 100644 index 6458d981..00000000 --- a/backend/indexing-job-manager-template.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# NOTE: the location of this file is important as it gets referenced by the src/main.py script -# and depends on the relative path to this file when uvicorn is run - -apiVersion: batch/v1 -kind: CronJob -metadata: - name: graphrag-index-manager -spec: - schedule: "*/5 * * * *" - jobTemplate: - spec: - template: - metadata: - labels: - azure.workload.identity/use: "true" - spec: - serviceAccountName: PLACEHOLDER - restartPolicy: OnFailure - containers: - - name: index-job-manager - image: PLACEHOLDER - imagePullPolicy: Always - resources: - requests: - cpu: "0.5" - memory: "0.5Gi" - limits: - cpu: "1" - memory: "1Gi" - envFrom: - - configMapRef: - name: graphrag - command: - - python - - "manage-indexing-jobs.py" diff --git a/backend/indexing-job-template.yaml b/backend/indexing-job-template.yaml deleted file mode 100644 index fa9914ea..00000000 --- a/backend/indexing-job-template.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# NOTE: the location of this file is important as it gets referenced by the manage-indexing-jobs.py script -# and depends on the relative path to this file when uvicorn is run - -apiVersion: batch/v1 -kind: Job -metadata: - name: PLACEHOLDER -spec: - ttlSecondsAfterFinished: 30 - backoffLimit: 3 - template: - metadata: - labels: - azure.workload.identity/use: "true" - spec: - serviceAccountName: PLACEHOLDER - restartPolicy: OnFailure - nodeSelector: - workload: graphrag-indexing - containers: - - name: graphrag - image: PLACEHOLDER - imagePullPolicy: Always - resources: - requests: - cpu: "5" - memory: "36Gi" - limits: - cpu: "8" - memory: "64Gi" - envFrom: - - configMapRef: - name: graphrag - command: [PLACEHOLDER] diff --git a/backend/manifests/cronjob.yaml b/backend/manifests/cronjob.yaml new file mode 100644 index 00000000..f731edf0 --- /dev/null +++ b/backend/manifests/cronjob.yaml @@ -0,0 +1,41 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +# NOTE: the location of this file is important as it gets referenced by the src/main.py script +# and depends on the relative path to this file when uvicorn is run + +apiVersion: batch/v1 +kind: CronJob +metadata: + name: graphrag-index-manager +spec: + schedule: "*/5 * * * *" + jobTemplate: + spec: + ttlSecondsAfterFinished: 180 + template: + metadata: + labels: + azure.workload.identity/use: "true" + spec: + serviceAccountName: PLACEHOLDER + restartPolicy: OnFailure + containers: + - name: index-job-manager + image: PLACEHOLDER + # override default WORKDIR with absolute path to the scripts directory + workingDir: "/backend/scripts" + imagePullPolicy: Always + resources: + requests: + cpu: "0.5" + memory: "0.5Gi" + limits: + cpu: "1" + memory: "1Gi" + envFrom: + - configMapRef: + name: graphrag + command: + - python + - "job-scheduler.py" diff --git a/backend/manifests/job.yaml b/backend/manifests/job.yaml new file mode 100644 index 00000000..502c345f --- /dev/null +++ b/backend/manifests/job.yaml @@ -0,0 +1,39 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +# NOTE: the location of this file is important as it gets referenced by the job-scheduler.py script +# and depends on the relative path to this file when uvicorn is run + +apiVersion: batch/v1 +kind: Job +metadata: + name: PLACEHOLDER +spec: + ttlSecondsAfterFinished: 300 + backoffLimit: 3 + template: + metadata: + labels: + azure.workload.identity/use: "true" + spec: + serviceAccountName: PLACEHOLDER + restartPolicy: OnFailure + nodeSelector: + workload: graphrag-indexing + containers: + - name: graphrag + image: PLACEHOLDER + # override default WORKDIR with absolute path to the scripts directory + workingDir: "/backend/scripts" + imagePullPolicy: Always + resources: + requests: + cpu: "5" + memory: "36Gi" + limits: + cpu: "8" + memory: "64Gi" + envFrom: + - configMapRef: + name: graphrag + command: [PLACEHOLDER] diff --git a/backend/poetry.lock b/backend/poetry.lock index c7cdf554..f3506626 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -181,13 +181,13 @@ files = [ [[package]] name = "anyio" -version = "4.7.0" +version = "4.8.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.9" files = [ - {file = "anyio-4.7.0-py3-none-any.whl", hash = "sha256:ea60c3723ab42ba6fff7e8ccb0488c898ec538ff4df1f1d5e642c3601d07e352"}, - {file = "anyio-4.7.0.tar.gz", hash = "sha256:2f834749c602966b7d456a7567cafcb309f96482b5081d14ac93ccd457f9dd48"}, + {file = "anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a"}, + {file = "anyio-4.8.0.tar.gz", hash = "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a"}, ] [package.dependencies] @@ -198,7 +198,7 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] trio = ["trio (>=0.26.1)"] [[package]] @@ -215,17 +215,6 @@ files = [ [package.dependencies] six = "*" -[[package]] -name = "applicationinsights" -version = "0.11.10" -description = "This project extends the Application Insights API surface to support Python." -optional = false -python-versions = "*" -files = [ - {file = "applicationinsights-0.11.10-py2.py3-none-any.whl", hash = "sha256:e89a890db1c6906b6a7d0bcfd617dac83974773c64573147c8d6654f9cf2a6ea"}, - {file = "applicationinsights-0.11.10.tar.gz", hash = "sha256:0b761f3ef0680acf4731906dfc1807faa6f2a57168ae74592db0084a6099f7b3"}, -] - [[package]] name = "appnope" version = "0.1.4" @@ -313,6 +302,23 @@ types-python-dateutil = ">=2.8.10" doc = ["doc8", "sphinx (>=7.0.0)", "sphinx-autobuild", "sphinx-autodoc-typehints", "sphinx_rtd_theme (>=1.3.0)"] test = ["dateparser (==1.*)", "pre-commit", "pytest", "pytest-cov", "pytest-mock", "pytz (==2021.1)", "simplejson (==3.*)"] +[[package]] +name = "asgiref" +version = "3.8.1" +description = "ASGI specs, helper code, and adapters" +optional = false +python-versions = ">=3.8" +files = [ + {file = "asgiref-3.8.1-py3-none-any.whl", hash = "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47"}, + {file = "asgiref-3.8.1.tar.gz", hash = "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} + +[package.extras] +tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] + [[package]] name = "asttokens" version = "2.4.1" @@ -358,13 +364,13 @@ files = [ [[package]] name = "attrs" -version = "24.3.0" +version = "25.1.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.8" files = [ - {file = "attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308"}, - {file = "attrs-24.3.0.tar.gz", hash = "sha256:8f5c07333d543103541ba7be0e2ce16eeee8130cb0b3f9238ab904ce1e85baff"}, + {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"}, + {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"}, ] [package.extras] @@ -422,6 +428,21 @@ typing-extensions = ">=4.6.0" [package.extras] aio = ["aiohttp (>=3.0)"] +[[package]] +name = "azure-core-tracing-opentelemetry" +version = "1.0.0b11" +description = "Microsoft Azure Azure Core OpenTelemetry plugin Library for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "azure-core-tracing-opentelemetry-1.0.0b11.tar.gz", hash = "sha256:a230d1555838b5d07b7594221cd639ea7bc24e29c881e5675e311c6067bad4f5"}, + {file = "azure_core_tracing_opentelemetry-1.0.0b11-py3-none-any.whl", hash = "sha256:016cefcaff2900fb5cdb7a8a7abd03e9c266622c06e26b3fe6dafa54c4b48bf5"}, +] + +[package.dependencies] +azure-core = ">=1.24.0,<2.0.0" +opentelemetry-api = ">=1.12.0,<2.0.0" + [[package]] name = "azure-cosmos" version = "4.9.0" @@ -471,15 +492,40 @@ msal = ">=1.30.0" msal-extensions = ">=1.2.0" typing-extensions = ">=4.0.0" +[[package]] +name = "azure-monitor-opentelemetry" +version = "1.6.4" +description = "Microsoft Azure Monitor Opentelemetry Distro Client Library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "azure_monitor_opentelemetry-1.6.4-py3-none-any.whl", hash = "sha256:014142ffa420bc2b287ff3bd30de6c31d64b2846423d011a8280334d7afcb01a"}, + {file = "azure_monitor_opentelemetry-1.6.4.tar.gz", hash = "sha256:9f5ce4c666caf1f9b536f8ab4ee207dff94777d568517c74f26e3327f75c3fc3"}, +] + +[package.dependencies] +azure-core = ">=1.28.0,<2.0.0" +azure-core-tracing-opentelemetry = ">=1.0.0b11,<1.1.0" +azure-monitor-opentelemetry-exporter = ">=1.0.0b31,<1.1.0" +opentelemetry-instrumentation-django = ">=0.49b0,<1.0" +opentelemetry-instrumentation-fastapi = ">=0.49b0,<1.0" +opentelemetry-instrumentation-flask = ">=0.49b0,<1.0" +opentelemetry-instrumentation-psycopg2 = ">=0.49b0,<1.0" +opentelemetry-instrumentation-requests = ">=0.49b0,<1.0" +opentelemetry-instrumentation-urllib = ">=0.49b0,<1.0" +opentelemetry-instrumentation-urllib3 = ">=0.49b0,<1.0" +opentelemetry-resource-detector-azure = ">=0.1.4,<0.2.0" +opentelemetry-sdk = ">=1.28,<2.0" + [[package]] name = "azure-monitor-opentelemetry-exporter" -version = "1.0.0b32" +version = "1.0.0b33" description = "Microsoft Azure Monitor Opentelemetry Exporter Client Library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "azure_monitor_opentelemetry_exporter-1.0.0b32-py2.py3-none-any.whl", hash = "sha256:48fe5e2c29e509b65413c9715040a9dc6cc052bb7cc932933535373ca0c54ba7"}, - {file = "azure_monitor_opentelemetry_exporter-1.0.0b32.tar.gz", hash = "sha256:f16d1d5636fa3dd834f3f63972dee78c9f17fd296a39525772e6f281e7f258cd"}, + {file = "azure_monitor_opentelemetry_exporter-1.0.0b33-py2.py3-none-any.whl", hash = "sha256:ebcf86c9b717f9b82bbceb89b55b09fd7147700264ae7ecabc424d90a9f5f01a"}, + {file = "azure_monitor_opentelemetry_exporter-1.0.0b33.tar.gz", hash = "sha256:1cbbd41b4cb44a2ade016408b23a21762583b9da913d8ae259f29356d3a6d0ae"}, ] [package.dependencies] @@ -509,13 +555,13 @@ typing-extensions = ">=4.6.0" [[package]] name = "azure-storage-blob" -version = "12.24.0" +version = "12.24.1" description = "Microsoft Azure Blob Storage Client Library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "azure_storage_blob-12.24.0-py3-none-any.whl", hash = "sha256:4f0bb4592ea79a2d986063696514c781c9e62be240f09f6397986e01755bc071"}, - {file = "azure_storage_blob-12.24.0.tar.gz", hash = "sha256:eaaaa1507c8c363d6e1d1342bd549938fdf1adec9b1ada8658c8f5bf3aea844e"}, + {file = "azure_storage_blob-12.24.1-py3-none-any.whl", hash = "sha256:77fb823fdbac7f3c11f7d86a5892e2f85e161e8440a7489babe2195bf248f09e"}, + {file = "azure_storage_blob-12.24.1.tar.gz", hash = "sha256:052b2a1ea41725ba12e2f4f17be85a54df1129e13ea0321f5a2fcc851cbf47d4"}, ] [package.dependencies] @@ -592,6 +638,7 @@ files = [ ] [package.dependencies] +tinycss2 = {version = ">=1.1.0,<1.5", optional = true, markers = "extra == \"css\""} webencodings = "*" [package.extras] @@ -599,13 +646,13 @@ css = ["tinycss2 (>=1.1.0,<1.5)"] [[package]] name = "cachetools" -version = "5.5.0" +version = "5.5.1" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" files = [ - {file = "cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292"}, - {file = "cachetools-5.5.0.tar.gz", hash = "sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a"}, + {file = "cachetools-5.5.1-py3-none-any.whl", hash = "sha256:b76651fdc3b24ead3c648bbdeeb940c1b04d365b38b4af66788f9ec4a81d42bb"}, + {file = "cachetools-5.5.1.tar.gz", hash = "sha256:70f238fbba50383ef62e55c6aff6d9673175fe59f7c6782c7a0b9e38f4a9df95"}, ] [[package]] @@ -824,17 +871,6 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} -[[package]] -name = "cloudpickle" -version = "3.1.0" -description = "Pickler class to extend the standard pickle.Pickler functionality" -optional = false -python-versions = ">=3.8" -files = [ - {file = "cloudpickle-3.1.0-py3-none-any.whl", hash = "sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e"}, - {file = "cloudpickle-3.1.0.tar.gz", hash = "sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b"}, -] - [[package]] name = "colorama" version = "0.4.6" @@ -1181,107 +1217,39 @@ files = [ docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] tests = ["pytest", "pytest-cov", "pytest-xdist"] -[[package]] -name = "dask" -version = "2024.12.1" -description = "Parallel PyData with Task Scheduling" -optional = false -python-versions = ">=3.10" -files = [ - {file = "dask-2024.12.1-py3-none-any.whl", hash = "sha256:1f32acddf1a6994e3af6734756f0a92467c47050bc29f3555bb9b140420e8e19"}, - {file = "dask-2024.12.1.tar.gz", hash = "sha256:bac809af21c2dd7eb06827bccbfc612504f3ee6435580e548af912828f823195"}, -] - -[package.dependencies] -click = ">=8.1" -cloudpickle = ">=3.0.0" -dask-expr = {version = ">=1.1,<1.2", optional = true, markers = "extra == \"dataframe\""} -fsspec = ">=2021.09.0" -importlib_metadata = {version = ">=4.13.0", markers = "python_version < \"3.12\""} -numpy = {version = ">=1.24", optional = true, markers = "extra == \"array\""} -packaging = ">=20.0" -pandas = {version = ">=2.0", optional = true, markers = "extra == \"dataframe\""} -partd = ">=1.4.0" -pyyaml = ">=5.3.1" -toolz = ">=0.10.0" - -[package.extras] -array = ["numpy (>=1.24)"] -complete = ["dask[array,dataframe,diagnostics,distributed]", "lz4 (>=4.3.2)", "pyarrow (>=14.0.1)"] -dataframe = ["dask-expr (>=1.1,<1.2)", "dask[array]", "pandas (>=2.0)"] -diagnostics = ["bokeh (>=3.1.0)", "jinja2 (>=2.10.3)"] -distributed = ["distributed (==2024.12.1)"] -test = ["pandas[test]", "pre-commit", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist"] - -[[package]] -name = "dask-expr" -version = "1.1.21" -description = "High Level Expressions for Dask" -optional = false -python-versions = ">=3.10" -files = [ - {file = "dask_expr-1.1.21-py3-none-any.whl", hash = "sha256:2c2a9a0b0e66b26cf918679988f97e947bc936544f3a106102055adb9a9edeba"}, - {file = "dask_expr-1.1.21.tar.gz", hash = "sha256:eb45de8e6fea1ce2608a431b4e03a484592defb1796665530c91386ffac581d3"}, -] - -[package.dependencies] -dask = "2024.12.1" -pandas = ">=2" -pyarrow = ">=14.0.1" - -[package.extras] -analyze = ["crick", "distributed", "graphviz"] - -[[package]] -name = "datashaper" -version = "0.0.49" -description = "This project provides a collection of utilities for doing lightweight data wrangling." -optional = false -python-versions = ">=3.10,<4" -files = [ - {file = "datashaper-0.0.49-py3-none-any.whl", hash = "sha256:7f58cabacc834765595c6e04cfbbd05be6af71907e46ebc7a91d2a4add7c2643"}, - {file = "datashaper-0.0.49.tar.gz", hash = "sha256:05bfba5964474a62bdd5259ec3fa0173d01e365208b6a4aff4ea0e63096a7533"}, -] - -[package.dependencies] -diskcache = ">=5.6.3,<6.0.0" -jsonschema = ">=4.21.1,<5.0.0" -pandas = ">=2.2.0,<3.0.0" -pyarrow = ">=15.0.0,<16.0.0" - [[package]] name = "debugpy" -version = "1.8.11" +version = "1.8.12" description = "An implementation of the Debug Adapter Protocol for Python" optional = false python-versions = ">=3.8" files = [ - {file = "debugpy-1.8.11-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:2b26fefc4e31ff85593d68b9022e35e8925714a10ab4858fb1b577a8a48cb8cd"}, - {file = "debugpy-1.8.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61bc8b3b265e6949855300e84dc93d02d7a3a637f2aec6d382afd4ceb9120c9f"}, - {file = "debugpy-1.8.11-cp310-cp310-win32.whl", hash = "sha256:c928bbf47f65288574b78518449edaa46c82572d340e2750889bbf8cd92f3737"}, - {file = "debugpy-1.8.11-cp310-cp310-win_amd64.whl", hash = "sha256:8da1db4ca4f22583e834dcabdc7832e56fe16275253ee53ba66627b86e304da1"}, - {file = "debugpy-1.8.11-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:85de8474ad53ad546ff1c7c7c89230db215b9b8a02754d41cb5a76f70d0be296"}, - {file = "debugpy-1.8.11-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ffc382e4afa4aee367bf413f55ed17bd91b191dcaf979890af239dda435f2a1"}, - {file = "debugpy-1.8.11-cp311-cp311-win32.whl", hash = "sha256:40499a9979c55f72f4eb2fc38695419546b62594f8af194b879d2a18439c97a9"}, - {file = "debugpy-1.8.11-cp311-cp311-win_amd64.whl", hash = "sha256:987bce16e86efa86f747d5151c54e91b3c1e36acc03ce1ddb50f9d09d16ded0e"}, - {file = "debugpy-1.8.11-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:84e511a7545d11683d32cdb8f809ef63fc17ea2a00455cc62d0a4dbb4ed1c308"}, - {file = "debugpy-1.8.11-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce291a5aca4985d82875d6779f61375e959208cdf09fcec40001e65fb0a54768"}, - {file = "debugpy-1.8.11-cp312-cp312-win32.whl", hash = "sha256:28e45b3f827d3bf2592f3cf7ae63282e859f3259db44ed2b129093ca0ac7940b"}, - {file = "debugpy-1.8.11-cp312-cp312-win_amd64.whl", hash = "sha256:44b1b8e6253bceada11f714acf4309ffb98bfa9ac55e4fce14f9e5d4484287a1"}, - {file = "debugpy-1.8.11-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:8988f7163e4381b0da7696f37eec7aca19deb02e500245df68a7159739bbd0d3"}, - {file = "debugpy-1.8.11-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c1f6a173d1140e557347419767d2b14ac1c9cd847e0b4c5444c7f3144697e4e"}, - {file = "debugpy-1.8.11-cp313-cp313-win32.whl", hash = "sha256:bb3b15e25891f38da3ca0740271e63ab9db61f41d4d8541745cfc1824252cb28"}, - {file = "debugpy-1.8.11-cp313-cp313-win_amd64.whl", hash = "sha256:d8768edcbeb34da9e11bcb8b5c2e0958d25218df7a6e56adf415ef262cd7b6d1"}, - {file = "debugpy-1.8.11-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:ad7efe588c8f5cf940f40c3de0cd683cc5b76819446abaa50dc0829a30c094db"}, - {file = "debugpy-1.8.11-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:189058d03a40103a57144752652b3ab08ff02b7595d0ce1f651b9acc3a3a35a0"}, - {file = "debugpy-1.8.11-cp38-cp38-win32.whl", hash = "sha256:32db46ba45849daed7ccf3f2e26f7a386867b077f39b2a974bb5c4c2c3b0a280"}, - {file = "debugpy-1.8.11-cp38-cp38-win_amd64.whl", hash = "sha256:116bf8342062246ca749013df4f6ea106f23bc159305843491f64672a55af2e5"}, - {file = "debugpy-1.8.11-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:654130ca6ad5de73d978057eaf9e582244ff72d4574b3e106fb8d3d2a0d32458"}, - {file = "debugpy-1.8.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23dc34c5e03b0212fa3c49a874df2b8b1b8fda95160bd79c01eb3ab51ea8d851"}, - {file = "debugpy-1.8.11-cp39-cp39-win32.whl", hash = "sha256:52d8a3166c9f2815bfae05f386114b0b2d274456980d41f320299a8d9a5615a7"}, - {file = "debugpy-1.8.11-cp39-cp39-win_amd64.whl", hash = "sha256:52c3cf9ecda273a19cc092961ee34eb9ba8687d67ba34cc7b79a521c1c64c4c0"}, - {file = "debugpy-1.8.11-py2.py3-none-any.whl", hash = "sha256:0e22f846f4211383e6a416d04b4c13ed174d24cc5d43f5fd52e7821d0ebc8920"}, - {file = "debugpy-1.8.11.tar.gz", hash = "sha256:6ad2688b69235c43b020e04fecccdf6a96c8943ca9c2fb340b8adc103c655e57"}, + {file = "debugpy-1.8.12-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:a2ba7ffe58efeae5b8fad1165357edfe01464f9aef25e814e891ec690e7dd82a"}, + {file = "debugpy-1.8.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cbbd4149c4fc5e7d508ece083e78c17442ee13b0e69bfa6bd63003e486770f45"}, + {file = "debugpy-1.8.12-cp310-cp310-win32.whl", hash = "sha256:b202f591204023b3ce62ff9a47baa555dc00bb092219abf5caf0e3718ac20e7c"}, + {file = "debugpy-1.8.12-cp310-cp310-win_amd64.whl", hash = "sha256:9649eced17a98ce816756ce50433b2dd85dfa7bc92ceb60579d68c053f98dff9"}, + {file = "debugpy-1.8.12-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:36f4829839ef0afdfdd208bb54f4c3d0eea86106d719811681a8627ae2e53dd5"}, + {file = "debugpy-1.8.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a28ed481d530e3138553be60991d2d61103ce6da254e51547b79549675f539b7"}, + {file = "debugpy-1.8.12-cp311-cp311-win32.whl", hash = "sha256:4ad9a94d8f5c9b954e0e3b137cc64ef3f579d0df3c3698fe9c3734ee397e4abb"}, + {file = "debugpy-1.8.12-cp311-cp311-win_amd64.whl", hash = "sha256:4703575b78dd697b294f8c65588dc86874ed787b7348c65da70cfc885efdf1e1"}, + {file = "debugpy-1.8.12-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:7e94b643b19e8feb5215fa508aee531387494bf668b2eca27fa769ea11d9f498"}, + {file = "debugpy-1.8.12-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:086b32e233e89a2740c1615c2f775c34ae951508b28b308681dbbb87bba97d06"}, + {file = "debugpy-1.8.12-cp312-cp312-win32.whl", hash = "sha256:2ae5df899732a6051b49ea2632a9ea67f929604fd2b036613a9f12bc3163b92d"}, + {file = "debugpy-1.8.12-cp312-cp312-win_amd64.whl", hash = "sha256:39dfbb6fa09f12fae32639e3286112fc35ae976114f1f3d37375f3130a820969"}, + {file = "debugpy-1.8.12-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:696d8ae4dff4cbd06bf6b10d671e088b66669f110c7c4e18a44c43cf75ce966f"}, + {file = "debugpy-1.8.12-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:898fba72b81a654e74412a67c7e0a81e89723cfe2a3ea6fcd3feaa3395138ca9"}, + {file = "debugpy-1.8.12-cp313-cp313-win32.whl", hash = "sha256:22a11c493c70413a01ed03f01c3c3a2fc4478fc6ee186e340487b2edcd6f4180"}, + {file = "debugpy-1.8.12-cp313-cp313-win_amd64.whl", hash = "sha256:fdb3c6d342825ea10b90e43d7f20f01535a72b3a1997850c0c3cefa5c27a4a2c"}, + {file = "debugpy-1.8.12-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:b0232cd42506d0c94f9328aaf0d1d0785f90f87ae72d9759df7e5051be039738"}, + {file = "debugpy-1.8.12-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9af40506a59450f1315168d47a970db1a65aaab5df3833ac389d2899a5d63b3f"}, + {file = "debugpy-1.8.12-cp38-cp38-win32.whl", hash = "sha256:5cc45235fefac57f52680902b7d197fb2f3650112379a6fa9aa1b1c1d3ed3f02"}, + {file = "debugpy-1.8.12-cp38-cp38-win_amd64.whl", hash = "sha256:557cc55b51ab2f3371e238804ffc8510b6ef087673303890f57a24195d096e61"}, + {file = "debugpy-1.8.12-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:b5c6c967d02fee30e157ab5227706f965d5c37679c687b1e7bbc5d9e7128bd41"}, + {file = "debugpy-1.8.12-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88a77f422f31f170c4b7e9ca58eae2a6c8e04da54121900651dfa8e66c29901a"}, + {file = "debugpy-1.8.12-cp39-cp39-win32.whl", hash = "sha256:a4042edef80364239f5b7b5764e55fd3ffd40c32cf6753da9bda4ff0ac466018"}, + {file = "debugpy-1.8.12-cp39-cp39-win_amd64.whl", hash = "sha256:f30b03b0f27608a0b26c75f0bb8a880c752c0e0b01090551b9d87c7d783e2069"}, + {file = "debugpy-1.8.12-py2.py3-none-any.whl", hash = "sha256:274b6a2040349b5c9864e475284bce5bb062e63dce368a394b8cc865ae3b00c6"}, + {file = "debugpy-1.8.12.tar.gz", hash = "sha256:646530b04f45c830ceae8e491ca1c9320a2d2f0efea3141487c82130aba70dce"}, ] [[package]] @@ -1308,20 +1276,20 @@ files = [ [[package]] name = "deprecated" -version = "1.2.15" +version = "1.2.18" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ - {file = "Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320"}, - {file = "deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d"}, + {file = "Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec"}, + {file = "deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d"}, ] [package.dependencies] wrapt = ">=1.10,<2" [package.extras] -dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "jinja2 (>=3.0.3,<3.1.0)", "setuptools", "sphinx (<2)", "tox"] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools", "tox"] [[package]] name = "deprecation" @@ -1372,17 +1340,6 @@ asttokens = ">=2.0.0,<3.0.0" executing = ">=1.1.1" pygments = ">=2.15.0" -[[package]] -name = "diskcache" -version = "5.6.3" -description = "Disk Cache -- Disk and file backed persistent cache." -optional = false -python-versions = ">=3" -files = [ - {file = "diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19"}, - {file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"}, -] - [[package]] name = "distlib" version = "0.3.9" @@ -1466,13 +1423,13 @@ testing = ["hatch", "pre-commit", "pytest", "tox"] [[package]] name = "executing" -version = "2.1.0" +version = "2.2.0" description = "Get the currently executing AST node of a frame, and other information" optional = false python-versions = ">=3.8" files = [ - {file = "executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf"}, - {file = "executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab"}, + {file = "executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa"}, + {file = "executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755"}, ] [package.extras] @@ -1480,23 +1437,23 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth [[package]] name = "fastapi" -version = "0.115.6" +version = "0.115.7" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = false python-versions = ">=3.8" files = [ - {file = "fastapi-0.115.6-py3-none-any.whl", hash = "sha256:e9240b29e36fa8f4bb7290316988e90c381e5092e0cbe84e7818cc3713bcf305"}, - {file = "fastapi-0.115.6.tar.gz", hash = "sha256:9ec46f7addc14ea472958a96aae5b5de65f39721a46aaf5705c480d9a8b76654"}, + {file = "fastapi-0.115.7-py3-none-any.whl", hash = "sha256:eb6a8c8bf7f26009e8147111ff15b5177a0e19bb4a45bc3486ab14804539d21e"}, + {file = "fastapi-0.115.7.tar.gz", hash = "sha256:0f106da6c01d88a6786b3248fb4d7a940d071f6f488488898ad5d354b25ed015"}, ] [package.dependencies] pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" -starlette = ">=0.40.0,<0.42.0" +starlette = ">=0.40.0,<0.46.0" typing-extensions = ">=4.8.0" [package.extras] -all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] -standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "jinja2 (>=2.11.2)", "python-multipart (>=0.0.7)", "uvicorn[standard] (>=0.12.0)"] +all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=3.1.5)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "jinja2 (>=3.1.5)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"] [[package]] name = "fastapi-offline" @@ -1590,18 +1547,18 @@ lzo = ["python-lzo"] [[package]] name = "filelock" -version = "3.16.1" +version = "3.17.0" description = "A platform independent file lock." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"}, - {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"}, + {file = "filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338"}, + {file = "filelock-3.17.0.tar.gz", hash = "sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e"}, ] [package.extras] -docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] +docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] typing = ["typing-extensions (>=4.12.2)"] [[package]] @@ -1632,63 +1589,85 @@ mccabe = ">=0.7.0,<0.8.0" pycodestyle = ">=2.12.0,<2.13.0" pyflakes = ">=3.2.0,<3.3.0" +[[package]] +name = "fnllm" +version = "0.0.10" +description = "A function-based LLM protocol and wrapper." +optional = false +python-versions = ">=3.10" +files = [ + {file = "fnllm-0.0.10-py3-none-any.whl", hash = "sha256:e676001d9b0ebbe194590393d427385760adaefcab6a456268e4f13a0e9d2cb6"}, + {file = "fnllm-0.0.10.tar.gz", hash = "sha256:ece859432b83a462dc35db6483f36313ff935b79f437186daa44e3679f4f49cf"}, +] + +[package.dependencies] +aiolimiter = ">=1.1.0" +httpx = ">=0.27.0" +json-repair = ">=0.30.0" +pydantic = ">=2.8.2" +tenacity = ">=8.5.0" + +[package.extras] +azure = ["azure-identity (>=1.17.1)", "azure-storage-blob (>=12.20.0)"] +openai = ["openai (>=1.35.12)", "tiktoken (>=0.7.0)"] + [[package]] name = "fonttools" -version = "4.55.3" +version = "4.55.7" description = "Tools to manipulate font files" optional = false python-versions = ">=3.8" files = [ - {file = "fonttools-4.55.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1dcc07934a2165ccdc3a5a608db56fb3c24b609658a5b340aee4ecf3ba679dc0"}, - {file = "fonttools-4.55.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f7d66c15ba875432a2d2fb419523f5d3d347f91f48f57b8b08a2dfc3c39b8a3f"}, - {file = "fonttools-4.55.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e4ae3592e62eba83cd2c4ccd9462dcfa603ff78e09110680a5444c6925d841"}, - {file = "fonttools-4.55.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62d65a3022c35e404d19ca14f291c89cc5890032ff04f6c17af0bd1927299674"}, - {file = "fonttools-4.55.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d342e88764fb201286d185093781bf6628bbe380a913c24adf772d901baa8276"}, - {file = "fonttools-4.55.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dd68c87a2bfe37c5b33bcda0fba39b65a353876d3b9006fde3adae31f97b3ef5"}, - {file = "fonttools-4.55.3-cp310-cp310-win32.whl", hash = "sha256:1bc7ad24ff98846282eef1cbeac05d013c2154f977a79886bb943015d2b1b261"}, - {file = "fonttools-4.55.3-cp310-cp310-win_amd64.whl", hash = "sha256:b54baf65c52952db65df39fcd4820668d0ef4766c0ccdf32879b77f7c804d5c5"}, - {file = "fonttools-4.55.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8c4491699bad88efe95772543cd49870cf756b019ad56294f6498982408ab03e"}, - {file = "fonttools-4.55.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5323a22eabddf4b24f66d26894f1229261021dacd9d29e89f7872dd8c63f0b8b"}, - {file = "fonttools-4.55.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5480673f599ad410695ca2ddef2dfefe9df779a9a5cda89503881e503c9c7d90"}, - {file = "fonttools-4.55.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da9da6d65cd7aa6b0f806556f4985bcbf603bf0c5c590e61b43aa3e5a0f822d0"}, - {file = "fonttools-4.55.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e894b5bd60d9f473bed7a8f506515549cc194de08064d829464088d23097331b"}, - {file = "fonttools-4.55.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:aee3b57643827e237ff6ec6d28d9ff9766bd8b21e08cd13bff479e13d4b14765"}, - {file = "fonttools-4.55.3-cp311-cp311-win32.whl", hash = "sha256:eb6ca911c4c17eb51853143624d8dc87cdcdf12a711fc38bf5bd21521e79715f"}, - {file = "fonttools-4.55.3-cp311-cp311-win_amd64.whl", hash = "sha256:6314bf82c54c53c71805318fcf6786d986461622dd926d92a465199ff54b1b72"}, - {file = "fonttools-4.55.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f9e736f60f4911061235603a6119e72053073a12c6d7904011df2d8fad2c0e35"}, - {file = "fonttools-4.55.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7a8aa2c5e5b8b3bcb2e4538d929f6589a5c6bdb84fd16e2ed92649fb5454f11c"}, - {file = "fonttools-4.55.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07f8288aacf0a38d174445fc78377a97fb0b83cfe352a90c9d9c1400571963c7"}, - {file = "fonttools-4.55.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8d5e8916c0970fbc0f6f1bece0063363bb5857a7f170121a4493e31c3db3314"}, - {file = "fonttools-4.55.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ae3b6600565b2d80b7c05acb8e24d2b26ac407b27a3f2e078229721ba5698427"}, - {file = "fonttools-4.55.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:54153c49913f45065c8d9e6d0c101396725c5621c8aee744719300f79771d75a"}, - {file = "fonttools-4.55.3-cp312-cp312-win32.whl", hash = "sha256:827e95fdbbd3e51f8b459af5ea10ecb4e30af50221ca103bea68218e9615de07"}, - {file = "fonttools-4.55.3-cp312-cp312-win_amd64.whl", hash = "sha256:e6e8766eeeb2de759e862004aa11a9ea3d6f6d5ec710551a88b476192b64fd54"}, - {file = "fonttools-4.55.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a430178ad3e650e695167cb53242dae3477b35c95bef6525b074d87493c4bf29"}, - {file = "fonttools-4.55.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:529cef2ce91dc44f8e407cc567fae6e49a1786f2fefefa73a294704c415322a4"}, - {file = "fonttools-4.55.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e75f12c82127486fac2d8bfbf5bf058202f54bf4f158d367e41647b972342ca"}, - {file = "fonttools-4.55.3-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:859c358ebf41db18fb72342d3080bce67c02b39e86b9fbcf1610cca14984841b"}, - {file = "fonttools-4.55.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:546565028e244a701f73df6d8dd6be489d01617863ec0c6a42fa25bf45d43048"}, - {file = "fonttools-4.55.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:aca318b77f23523309eec4475d1fbbb00a6b133eb766a8bdc401faba91261abe"}, - {file = "fonttools-4.55.3-cp313-cp313-win32.whl", hash = "sha256:8c5ec45428edaa7022f1c949a632a6f298edc7b481312fc7dc258921e9399628"}, - {file = "fonttools-4.55.3-cp313-cp313-win_amd64.whl", hash = "sha256:11e5de1ee0d95af4ae23c1a138b184b7f06e0b6abacabf1d0db41c90b03d834b"}, - {file = "fonttools-4.55.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:caf8230f3e10f8f5d7593eb6d252a37caf58c480b19a17e250a63dad63834cf3"}, - {file = "fonttools-4.55.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b586ab5b15b6097f2fb71cafa3c98edfd0dba1ad8027229e7b1e204a58b0e09d"}, - {file = "fonttools-4.55.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8c2794ded89399cc2169c4d0bf7941247b8d5932b2659e09834adfbb01589aa"}, - {file = "fonttools-4.55.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf4fe7c124aa3f4e4c1940880156e13f2f4d98170d35c749e6b4f119a872551e"}, - {file = "fonttools-4.55.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:86721fbc389ef5cc1e2f477019e5069e8e4421e8d9576e9c26f840dbb04678de"}, - {file = "fonttools-4.55.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:89bdc5d88bdeec1b15af790810e267e8332d92561dce4f0748c2b95c9bdf3926"}, - {file = "fonttools-4.55.3-cp38-cp38-win32.whl", hash = "sha256:bc5dbb4685e51235ef487e4bd501ddfc49be5aede5e40f4cefcccabc6e60fb4b"}, - {file = "fonttools-4.55.3-cp38-cp38-win_amd64.whl", hash = "sha256:cd70de1a52a8ee2d1877b6293af8a2484ac82514f10b1c67c1c5762d38073e56"}, - {file = "fonttools-4.55.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bdcc9f04b36c6c20978d3f060e5323a43f6222accc4e7fcbef3f428e216d96af"}, - {file = "fonttools-4.55.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c3ca99e0d460eff46e033cd3992a969658c3169ffcd533e0a39c63a38beb6831"}, - {file = "fonttools-4.55.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22f38464daa6cdb7b6aebd14ab06609328fe1e9705bb0fcc7d1e69de7109ee02"}, - {file = "fonttools-4.55.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed63959d00b61959b035c7d47f9313c2c1ece090ff63afea702fe86de00dbed4"}, - {file = "fonttools-4.55.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5e8d657cd7326eeaba27de2740e847c6b39dde2f8d7cd7cc56f6aad404ddf0bd"}, - {file = "fonttools-4.55.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fb594b5a99943042c702c550d5494bdd7577f6ef19b0bc73877c948a63184a32"}, - {file = "fonttools-4.55.3-cp39-cp39-win32.whl", hash = "sha256:dc5294a3d5c84226e3dbba1b6f61d7ad813a8c0238fceea4e09aa04848c3d851"}, - {file = "fonttools-4.55.3-cp39-cp39-win_amd64.whl", hash = "sha256:aedbeb1db64496d098e6be92b2e63b5fac4e53b1b92032dfc6988e1ea9134a4d"}, - {file = "fonttools-4.55.3-py3-none-any.whl", hash = "sha256:f412604ccbeee81b091b420272841e5ec5ef68967a9790e80bffd0e30b8e2977"}, - {file = "fonttools-4.55.3.tar.gz", hash = "sha256:3983313c2a04d6cc1fe9251f8fc647754cf49a61dac6cb1e7249ae67afaafc45"}, + {file = "fonttools-4.55.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c2680a3e6e2e2d104a7ea81fb89323e1a9122c23b03d6569d0768887d0d76e69"}, + {file = "fonttools-4.55.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a7831d16c95b60866772a15fdcc03772625c4bb6d858e0ad8ef3d6e48709b2ef"}, + {file = "fonttools-4.55.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:833927d089e6585019f2c85e3f8f7d87733e3fe81cd704ebaca7afa27e2e7113"}, + {file = "fonttools-4.55.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7858dc6823296a053d85b831fa8428781c6c6f06fca44582bf7b6b2ff32a9089"}, + {file = "fonttools-4.55.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05568a66b090ed9d79aefdce2ceb180bb64fc856961deaedc29f5ad51355ce2c"}, + {file = "fonttools-4.55.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2dbc08e227fbeb716776905a7bd3c4fc62c8e37c8ef7d481acd10cb5fde12222"}, + {file = "fonttools-4.55.7-cp310-cp310-win32.whl", hash = "sha256:6eb93cbba484a463b5ee83f7dd3211905f27a3871d20d90fb72de84c6c5056e3"}, + {file = "fonttools-4.55.7-cp310-cp310-win_amd64.whl", hash = "sha256:7ff8e606f905048dc91a55a06d994b68065bf35752ae199df54a9bf30013dcaa"}, + {file = "fonttools-4.55.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:916e1d926823b4b3b3815c59fc79f4ed670696fdd5fd9a5e690a0503eef38f79"}, + {file = "fonttools-4.55.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b89da448e0073408d7b2c44935f9fdae4fdc93644899f99f6102ef883ecf083c"}, + {file = "fonttools-4.55.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:087ace2d06894ccdb03e6975d05da6bb9cec0c689b2a9983c059880e33a1464a"}, + {file = "fonttools-4.55.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:775ed0700ee6f781436641f18a0c61b1846a8c1aecae6da6b395c4417e2cb567"}, + {file = "fonttools-4.55.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9ec71d0cc0242899f87e4c230ed0b22c7b8681f288fb80e3d81c2c54c5bd2c79"}, + {file = "fonttools-4.55.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d4b1c5939c0521525f45522823508e6fad21175bca978583688ea3b3736e6625"}, + {file = "fonttools-4.55.7-cp311-cp311-win32.whl", hash = "sha256:23df0f1003abaf8a435543f59583fc247e7ae1b047ee2263510e0654a5f207e0"}, + {file = "fonttools-4.55.7-cp311-cp311-win_amd64.whl", hash = "sha256:82163d58b43eff6e2025a25c32905fdb9042a163cc1ff82dab393e7ffc77a7d5"}, + {file = "fonttools-4.55.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:12e81d44f762156d28b5c93a6b65d98ed73678be45b22546de8ed29736c3cb96"}, + {file = "fonttools-4.55.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c26445a7be689f8b70df7d5d2e2c85ec4407bdb769902a23dd45ac44f767575d"}, + {file = "fonttools-4.55.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2cbafedb9462be7cf68c66b6ca1d8309842fe36b729f1b1969595f5d660e5c2"}, + {file = "fonttools-4.55.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4bde87985012adbd7559bc363d802fb335e92a07ff86a76cf02bebb0b8566d1"}, + {file = "fonttools-4.55.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:69ed0660750993150f7c4d966c0c1ffaa0385f23ccef85c2ff108062d80dd7ea"}, + {file = "fonttools-4.55.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3098355e7a7b5ac48d5dc29684a65271187b865b85675033958b57c40364ee34"}, + {file = "fonttools-4.55.7-cp312-cp312-win32.whl", hash = "sha256:ee7aa8bb716318e3d835ef473978e22b7a39c0f1b3b08cc0b0ee1bba6f73bc1e"}, + {file = "fonttools-4.55.7-cp312-cp312-win_amd64.whl", hash = "sha256:e696d6e2baf4cc57ded34bb87e5d3a9e4da9732f3d9e8e2c6db0746e57a6dc0b"}, + {file = "fonttools-4.55.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e10c7fb80cdfdc32244514cbea0906e9f53e3cc80d64d3389da09502fd999b55"}, + {file = "fonttools-4.55.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1101976c703ff4008a928fc3fef42caf06d035bfc4614230d7e797cbe356feb0"}, + {file = "fonttools-4.55.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e6dffe9cbcd163ef617fab1f81682e4d1629b7a5b9c5e598274dc2d03e88bcd"}, + {file = "fonttools-4.55.7-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77e5115a425d53be6e31cd0fe9210f62a488bccf81eb113ab5dd7f4fa88e4d81"}, + {file = "fonttools-4.55.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f0c45eae32d090763820756b18322a70571dada3f1cbe003debc37a9c35bc260"}, + {file = "fonttools-4.55.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fd4ebc475d43f3de2b26e0cf551eff92c24e22d1aee03dc1b33adb52fc2e6cb2"}, + {file = "fonttools-4.55.7-cp313-cp313-win32.whl", hash = "sha256:371197de1283cc99f5f10eb91496520eb0e2d079312d014fd6cef9e802174c6a"}, + {file = "fonttools-4.55.7-cp313-cp313-win_amd64.whl", hash = "sha256:418ece624fbc04e199f58398ffef3eaad645baba65434871b09eb7350a3a346b"}, + {file = "fonttools-4.55.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3976db357484bf4cb533dfd0d1a444b38ad06062458715ebf21e38c71aff325d"}, + {file = "fonttools-4.55.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:30c3501328363b73a90acc8a722dd199c993f2c4369ea16886128d94e91897ec"}, + {file = "fonttools-4.55.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0899cd23967950e7b902ea75af06cfe5f59ac71eb38e98a774c9e596790e6aa"}, + {file = "fonttools-4.55.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f669910b64d27750398f6c56c651367d4954b05c86ff067af1c9949e109cf1e2"}, + {file = "fonttools-4.55.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:1d4be8354c245c00aecfc90f5d3da8606226f0ac22e1cb0837b39139e4c2df85"}, + {file = "fonttools-4.55.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:9074a2848ea5b607377e16998dfcf90cf5eb614d0c388541b9782d5cc038e149"}, + {file = "fonttools-4.55.7-cp38-cp38-win32.whl", hash = "sha256:5ff0daf8b2e0612e5761fed2e4a2f54eff9d9ec0aeb4091c9f3666f9a118325e"}, + {file = "fonttools-4.55.7-cp38-cp38-win_amd64.whl", hash = "sha256:0ed25d7b5fa4ae6a805c2a9cc0e5307d45cbb3b8e155584fe932d0f3b6a997bf"}, + {file = "fonttools-4.55.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8ef5ee98fc320c158e4e459a5ee40d1ac3728d4ce11c3c8dfd854aa0aa5c042f"}, + {file = "fonttools-4.55.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:09740feed51f9ed816aebf5d82071b7fecf693ac3a7e0fc8ea433f5dc3bd92f5"}, + {file = "fonttools-4.55.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3d19ea483b3cd8833e9e2ee8115f3d2044d55d3743d84f9c23b48b52d7516d8"}, + {file = "fonttools-4.55.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c135c91d47351b84893fb6fcbb8f178eba14f7cb195850264c0675c85e4238b6"}, + {file = "fonttools-4.55.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:bee4920ebeb540849bc3555d871e2a8487e39ce8263c281f74d5b6d44d2bf1df"}, + {file = "fonttools-4.55.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f3b63648600dd0081bdd6856a86d014a7f1d2d11c3c974542f866478d832e103"}, + {file = "fonttools-4.55.7-cp39-cp39-win32.whl", hash = "sha256:d4bd27f0fa5120aaa39f76de5768959bc97300e0f59a3160d466b51436a38aea"}, + {file = "fonttools-4.55.7-cp39-cp39-win_amd64.whl", hash = "sha256:c665df9c9d99937a5bf807bace1c0c95bd13f55de8c82aaf9856b868dcbfe5d9"}, + {file = "fonttools-4.55.7-py3-none-any.whl", hash = "sha256:3304dfcf9ca204dd0ef691a287bd851ddd8e8250108658c0677c3fdfec853a20"}, + {file = "fonttools-4.55.7.tar.gz", hash = "sha256:6899e3d97225a8218f525e9754da0376e1c62953a0d57a76c5abaada51e0d140"}, ] [package.extras] @@ -1915,13 +1894,13 @@ test-win = ["POT", "pytest", "pytest-cov", "testfixtures"] [[package]] name = "google-auth" -version = "2.37.0" +version = "2.38.0" description = "Google Authentication Library" optional = false python-versions = ">=3.7" files = [ - {file = "google_auth-2.37.0-py2.py3-none-any.whl", hash = "sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0"}, - {file = "google_auth-2.37.0.tar.gz", hash = "sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00"}, + {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"}, + {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"}, ] [package.dependencies] @@ -1939,47 +1918,45 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"] [[package]] name = "graphrag" -version = "0.3.3" -description = "" +version = "1.2.0" +description = "GraphRAG: A graph-based retrieval-augmented generation (RAG) system." optional = false python-versions = "<3.13,>=3.10" files = [ - {file = "graphrag-0.3.3-py3-none-any.whl", hash = "sha256:b48b2bd98f168b5fea182d9db42e61d13496d87c80be90d5e431b56bbe930bc0"}, - {file = "graphrag-0.3.3.tar.gz", hash = "sha256:2a082e6c47ee93b469862d3f46b94e2578d2133c95f1280e07855f119a9279a8"}, + {file = "graphrag-1.2.0-py3-none-any.whl", hash = "sha256:7f7312d57122a3f100e60ff123b7034faaf62eee3fd2d859418e3546118a571d"}, + {file = "graphrag-1.2.0.tar.gz", hash = "sha256:c1396cdd48fd67384bd40ae9aec39a65ecaece4b7d1cf7706a270034d8e87a53"}, ] [package.dependencies] aiofiles = ">=24.1.0,<25.0.0" -aiolimiter = ">=1.1.0,<2.0.0" -azure-identity = ">=1.17.1,<2.0.0" -azure-search-documents = ">=11.4.0,<12.0.0" -azure-storage-blob = ">=12.22.0,<13.0.0" -datashaper = ">=0.0.49,<0.0.50" +azure-cosmos = ">=4.9.0,<5.0.0" +azure-identity = ">=1.19.0,<2.0.0" +azure-search-documents = ">=11.5.2,<12.0.0" +azure-storage-blob = ">=12.24.0,<13.0.0" devtools = ">=0.12.2,<0.13.0" environs = ">=11.0.0,<12.0.0" -fastparquet = ">=2024.2.0,<2025.0.0" +fnllm = ">=0.0.10,<0.0.11" future = ">=1.0.0,<2.0.0" graspologic = ">=3.4.1,<4.0.0" -json-repair = ">=0.28.4,<0.29.0" -lancedb = ">=0.12.0,<0.13.0" -nest-asyncio = {version = ">=1.6.0,<2.0.0", markers = "platform_system == \"Windows\""} -networkx = ">=3,<4" +httpx = ">=0.28.1,<0.29.0" +json-repair = ">=0.30.3,<0.31.0" +lancedb = ">=0.17.0,<0.18.0" +networkx = ">=3.4.2,<4.0.0" nltk = "3.9.1" -numba = "0.60.0" numpy = ">=1.25.2,<2.0.0" -openai = ">=1.37.1,<2.0.0" -pyaml-env = ">=1.2.1,<2.0.0" -pydantic = ">=2,<3" -python-dotenv = ">=1.0.0,<2.0.0" +openai = ">=1.57.0,<2.0.0" +pandas = ">=2.2.3,<3.0.0" +pyarrow = ">=15.0.0,<16.0.0" +pydantic = ">=2.10.3,<3.0.0" +python-dotenv = ">=1.0.1,<2.0.0" pyyaml = ">=6.0.2,<7.0.0" -rich = ">=13.6.0,<14.0.0" -scipy = "1.12.0" -swifter = ">=1.4.0,<2.0.0" +rich = ">=13.9.4,<14.0.0" tenacity = ">=9.0.0,<10.0.0" -textual = ">=0.78.0,<0.79.0" -tiktoken = ">=0.7.0,<0.8.0" +tiktoken = ">=0.8.0,<0.9.0" +tqdm = ">=4.67.1,<5.0.0" +typer = ">=0.15.1,<0.16.0" typing-extensions = ">=4.12.2,<5.0.0" -uvloop = {version = ">=0.20.0,<0.21.0", markers = "platform_system != \"Windows\""} +umap-learn = ">=0.5.6,<0.6.0" [[package]] name = "graspologic" @@ -2012,15 +1989,15 @@ umap-learn = ">=0.5.6,<0.6.0" [[package]] name = "graspologic-native" -version = "1.2.1" +version = "1.2.3" description = "Python native companion module to the graspologic library" optional = false -python-versions = ">=3.6, <3.13" +python-versions = "<3.14,>=3.8" files = [ - {file = "graspologic_native-1.2.1-cp36-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:eccb2fa475b604375e34b4ae1d5497a428c34ed65f27888495239f8e120acea1"}, - {file = "graspologic_native-1.2.1-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a44cfdee11718c01c0f6c544750b3ae64e28cc03432a620fe0295704bd0d618d"}, - {file = "graspologic_native-1.2.1-cp36-abi3-win_amd64.whl", hash = "sha256:56b5e66ba003fd38efc0919ce90fa22d379456e177dca65e26626498d2b9b96b"}, - {file = "graspologic_native-1.2.1.tar.gz", hash = "sha256:72b7586028a91e9fef9af0ef314d368f0240c18dca99e6e6c546334359a8610a"}, + {file = "graspologic_native-1.2.3-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:b2fe41f24fa826dc0c134c7e3c8781090c3056a0000e74ac927b34caca8b3c6b"}, + {file = "graspologic_native-1.2.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b25c75c31f7650905b75a7fe01f89bf8e89667bf6fcb1c733b0260599df2d00"}, + {file = "graspologic_native-1.2.3-cp38-abi3-win_amd64.whl", hash = "sha256:57ded2c8532878ff662888c0397f4909d70fdf0e98d808de707238c67857ab5c"}, + {file = "graspologic_native-1.2.3.tar.gz", hash = "sha256:7c059f7b580248abc3fee8828b9e97ac48ac9a9554fdeafaa35862871ac5113a"}, ] [[package]] @@ -2098,13 +2075,13 @@ scipy = ">=1.4.0" [[package]] name = "identify" -version = "2.6.4" +version = "2.6.6" description = "File identification library for Python" optional = false python-versions = ">=3.9" files = [ - {file = "identify-2.6.4-py2.py3-none-any.whl", hash = "sha256:993b0f01b97e0568c179bb9196391ff391bfb88a99099dbf5ce392b68f42d0af"}, - {file = "identify-2.6.4.tar.gz", hash = "sha256:285a7d27e397652e8cafe537a6cc97dd470a970f48fb2e9d979aa38eae5513ac"}, + {file = "identify-2.6.6-py2.py3-none-any.whl", hash = "sha256:cbd1810bce79f8b671ecb20f53ee0ae8e86ae84b557de31d89709dc2a48ba881"}, + {file = "identify-2.6.6.tar.gz", hash = "sha256:7bec12768ed44ea4761efb47806f0a41f86e7c0a5fdf5950d4648c90eca7e251"}, ] [package.extras] @@ -2409,13 +2386,13 @@ files = [ [[package]] name = "json-repair" -version = "0.28.4" +version = "0.30.3" description = "A package to repair broken json strings" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "json_repair-0.28.4-py3-none-any.whl", hash = "sha256:b3e138735b25773a7c02b42498c80aa7d6b7aec168a3ee100473f7c5e22334e5"}, - {file = "json_repair-0.28.4.tar.gz", hash = "sha256:b7f1c48d8bc9e18a24e2b05459e8afb0a82cade5a242382422a9b065a6762578"}, + {file = "json_repair-0.30.3-py3-none-any.whl", hash = "sha256:63bb588162b0958ae93d85356ecbe54c06b8c33f8a4834f93fa2719ea669804e"}, + {file = "json_repair-0.30.3.tar.gz", hash = "sha256:0ac56e7ae9253ee9c507a7e1a3a26799c9b0bbe5e2bec1b2cc5053e90d5b05e3"}, ] [[package]] @@ -2837,13 +2814,13 @@ files = [ [[package]] name = "kubernetes" -version = "31.0.0" +version = "32.0.0" description = "Kubernetes python client" optional = false python-versions = ">=3.6" files = [ - {file = "kubernetes-31.0.0-py2.py3-none-any.whl", hash = "sha256:bf141e2d380c8520eada8b351f4e319ffee9636328c137aa432bc486ca1200e1"}, - {file = "kubernetes-31.0.0.tar.gz", hash = "sha256:28945de906c8c259c1ebe62703b56a03b714049372196f854105afe4e6d014c0"}, + {file = "kubernetes-32.0.0-py2.py3-none-any.whl", hash = "sha256:60fd8c29e8e43d9c553ca4811895a687426717deba9c0a66fb2dcc3f5ef96692"}, + {file = "kubernetes-32.0.0.tar.gz", hash = "sha256:319fa840345a482001ac5d6062222daeb66ec4d1bcb3087402aed685adf0aecb"}, ] [package.dependencies] @@ -2864,30 +2841,25 @@ adal = ["adal (>=1.0.2)"] [[package]] name = "lancedb" -version = "0.12.0" +version = "0.17.0" description = "lancedb" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "lancedb-0.12.0-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:28df523b1b1d7b3db47ee95baa8dd85b94238e811a1b04de91083b47aa28dcb5"}, - {file = "lancedb-0.12.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:07a92887e470befe5d89ef960afcd516bb9501796458751885499d31c1e448de"}, - {file = "lancedb-0.12.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea79b71f2382a3974447a341e0fbb904b9db70269f32378f4cb45b2b20b90771"}, - {file = "lancedb-0.12.0-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:db9d4f37252075ede29e090517eb15025d85edcd56001e788b7b7a22190b2028"}, - {file = "lancedb-0.12.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:8c52158181b6f63cc24a93c11f4bd038d8932ac8c9d14ef9c6e6aaeefb54fb1f"}, - {file = "lancedb-0.12.0-cp38-abi3-win_amd64.whl", hash = "sha256:c4e63c3b5b3ce40303ec350ba7000bfb7c7c181b7de59c7950f7c2c4bda0421c"}, + {file = "lancedb-0.17.0-cp39-abi3-macosx_10_15_x86_64.whl", hash = "sha256:40aac1583edda390e51189c4e95bdfd4768d23705234e12a7b81957f1143df42"}, + {file = "lancedb-0.17.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:895bed499dae61cac1dbfc40ad71a566e06ab5c8d538aa57873a0cba859f8a7a"}, + {file = "lancedb-0.17.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ea688d0f63796ee912a7cfe6667f36661e36756fa8340b94dd54d666a7db63f"}, + {file = "lancedb-0.17.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:f51a61950ead30a605b5653a81e8362e4aac6fec32705b88b9c9319e9308b2bb"}, + {file = "lancedb-0.17.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:07e6f10b3fcbeb6c737996e5ebd68d04c3ca2656a9b8b970111ecf368245e7f6"}, + {file = "lancedb-0.17.0-cp39-abi3-win_amd64.whl", hash = "sha256:9d7e82f83f430d906c285d3303729258b21b1cc8da634c9f7017e354bcb7318a"}, ] [package.dependencies] -attrs = ">=21.3.0" -cachetools = "*" deprecation = "*" overrides = ">=0.7" packaging = "*" pydantic = ">=1.10" -pylance = "0.16.0" -ratelimiter = ">=1.0,<2.0" -requests = ">=2.31.0" -retry = ">=0.9.2" +pylance = "0.20.0" tqdm = ">=4.27.0" [package.extras] @@ -2895,68 +2867,37 @@ azure = ["adlfs (>=2024.2.0)"] clip = ["open-clip", "pillow", "torch"] dev = ["pre-commit", "ruff"] docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"] -embeddings = ["awscli (>=1.29.57)", "boto3 (>=1.28.57)", "botocore (>=1.31.57)", "cohere", "google-generativeai", "huggingface-hub", "ibm-watsonx-ai (>=1.1.2)", "instructorembedding", "ollama", "open-clip-torch", "openai (>=1.6.1)", "pillow", "sentence-transformers", "torch"] +embeddings = ["awscli (>=1.29.57)", "boto3 (>=1.28.57)", "botocore (>=1.31.57)", "cohere", "google-generativeai", "huggingface-hub", "ibm-watsonx-ai (>=1.1.2)", "instructorembedding", "ollama", "open-clip-torch", "openai (>=1.6.1)", "pillow", "requests (>=2.31.0)", "sentence-transformers", "torch"] tests = ["aiohttp", "boto3", "duckdb", "pandas (>=1.4)", "polars (>=0.19,<=1.3.0)", "pytest", "pytest-asyncio", "pytest-mock", "pytz", "tantivy"] -[[package]] -name = "linkify-it-py" -version = "2.0.3" -description = "Links recognition library with FULL unicode support." -optional = false -python-versions = ">=3.7" -files = [ - {file = "linkify-it-py-2.0.3.tar.gz", hash = "sha256:68cda27e162e9215c17d786649d1da0021a451bdc436ef9e0fa0ba5234b9b048"}, - {file = "linkify_it_py-2.0.3-py3-none-any.whl", hash = "sha256:6bcbc417b0ac14323382aef5c5192c0075bf8a9d6b41820a2b66371eac6b6d79"}, -] - -[package.dependencies] -uc-micro-py = "*" - -[package.extras] -benchmark = ["pytest", "pytest-benchmark"] -dev = ["black", "flake8", "isort", "pre-commit", "pyproject-flake8"] -doc = ["myst-parser", "sphinx", "sphinx-book-theme"] -test = ["coverage", "pytest", "pytest-cov"] - [[package]] name = "llvmlite" -version = "0.43.0" +version = "0.44.0" description = "lightweight wrapper around basic LLVM functionality" optional = false -python-versions = ">=3.9" -files = [ - {file = "llvmlite-0.43.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a289af9a1687c6cf463478f0fa8e8aa3b6fb813317b0d70bf1ed0759eab6f761"}, - {file = "llvmlite-0.43.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d4fd101f571a31acb1559ae1af30f30b1dc4b3186669f92ad780e17c81e91bc"}, - {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d434ec7e2ce3cc8f452d1cd9a28591745de022f931d67be688a737320dfcead"}, - {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6912a87782acdff6eb8bf01675ed01d60ca1f2551f8176a300a886f09e836a6a"}, - {file = "llvmlite-0.43.0-cp310-cp310-win_amd64.whl", hash = "sha256:14f0e4bf2fd2d9a75a3534111e8ebeb08eda2f33e9bdd6dfa13282afacdde0ed"}, - {file = "llvmlite-0.43.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8d0618cb9bfe40ac38a9633f2493d4d4e9fcc2f438d39a4e854f39cc0f5f98"}, - {file = "llvmlite-0.43.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0a9a1a39d4bf3517f2af9d23d479b4175ead205c592ceeb8b89af48a327ea57"}, - {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1da416ab53e4f7f3bc8d4eeba36d801cc1894b9fbfbf2022b29b6bad34a7df2"}, - {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977525a1e5f4059316b183fb4fd34fa858c9eade31f165427a3977c95e3ee749"}, - {file = "llvmlite-0.43.0-cp311-cp311-win_amd64.whl", hash = "sha256:d5bd550001d26450bd90777736c69d68c487d17bf371438f975229b2b8241a91"}, - {file = "llvmlite-0.43.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f99b600aa7f65235a5a05d0b9a9f31150c390f31261f2a0ba678e26823ec38f7"}, - {file = "llvmlite-0.43.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:35d80d61d0cda2d767f72de99450766250560399edc309da16937b93d3b676e7"}, - {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eccce86bba940bae0d8d48ed925f21dbb813519169246e2ab292b5092aba121f"}, - {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df6509e1507ca0760787a199d19439cc887bfd82226f5af746d6977bd9f66844"}, - {file = "llvmlite-0.43.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a2872ee80dcf6b5dbdc838763d26554c2a18aa833d31a2635bff16aafefb9c9"}, - {file = "llvmlite-0.43.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9cd2a7376f7b3367019b664c21f0c61766219faa3b03731113ead75107f3b66c"}, - {file = "llvmlite-0.43.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18e9953c748b105668487b7c81a3e97b046d8abf95c4ddc0cd3c94f4e4651ae8"}, - {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74937acd22dc11b33946b67dca7680e6d103d6e90eeaaaf932603bec6fe7b03a"}, - {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9efc739cc6ed760f795806f67889923f7274276f0eb45092a1473e40d9b867"}, - {file = "llvmlite-0.43.0-cp39-cp39-win_amd64.whl", hash = "sha256:47e147cdda9037f94b399bf03bfd8a6b6b1f2f90be94a454e3386f006455a9b4"}, - {file = "llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5"}, -] - -[[package]] -name = "locket" -version = "1.0.0" -description = "File-based locks for Python on Linux and Windows" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.10" files = [ - {file = "locket-1.0.0-py2.py3-none-any.whl", hash = "sha256:b6c819a722f7b6bd955b80781788e4a66a55628b858d347536b7e81325a3a5e3"}, - {file = "locket-1.0.0.tar.gz", hash = "sha256:5c0d4c052a8bbbf750e056a8e65ccd309086f4f0f18a2eac306a8dfa4112a632"}, + {file = "llvmlite-0.44.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9fbadbfba8422123bab5535b293da1cf72f9f478a65645ecd73e781f962ca614"}, + {file = "llvmlite-0.44.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cccf8eb28f24840f2689fb1a45f9c0f7e582dd24e088dcf96e424834af11f791"}, + {file = "llvmlite-0.44.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7202b678cdf904823c764ee0fe2dfe38a76981f4c1e51715b4cb5abb6cf1d9e8"}, + {file = "llvmlite-0.44.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40526fb5e313d7b96bda4cbb2c85cd5374e04d80732dd36a282d72a560bb6408"}, + {file = "llvmlite-0.44.0-cp310-cp310-win_amd64.whl", hash = "sha256:41e3839150db4330e1b2716c0be3b5c4672525b4c9005e17c7597f835f351ce2"}, + {file = "llvmlite-0.44.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:eed7d5f29136bda63b6d7804c279e2b72e08c952b7c5df61f45db408e0ee52f3"}, + {file = "llvmlite-0.44.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ace564d9fa44bb91eb6e6d8e7754977783c68e90a471ea7ce913bff30bd62427"}, + {file = "llvmlite-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5d22c3bfc842668168a786af4205ec8e3ad29fb1bc03fd11fd48460d0df64c1"}, + {file = "llvmlite-0.44.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f01a394e9c9b7b1d4e63c327b096d10f6f0ed149ef53d38a09b3749dcf8c9610"}, + {file = "llvmlite-0.44.0-cp311-cp311-win_amd64.whl", hash = "sha256:d8489634d43c20cd0ad71330dde1d5bc7b9966937a263ff1ec1cebb90dc50955"}, + {file = "llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad"}, + {file = "llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db"}, + {file = "llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9"}, + {file = "llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d"}, + {file = "llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1"}, + {file = "llvmlite-0.44.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516"}, + {file = "llvmlite-0.44.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e"}, + {file = "llvmlite-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf"}, + {file = "llvmlite-0.44.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc"}, + {file = "llvmlite-0.44.0-cp313-cp313-win_amd64.whl", hash = "sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930"}, + {file = "llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4"}, ] [[package]] @@ -2971,8 +2912,6 @@ files = [ ] [package.dependencies] -linkify-it-py = {version = ">=1,<3", optional = true, markers = "extra == \"linkify\""} -mdit-py-plugins = {version = "*", optional = true, markers = "extra == \"plugins\""} mdurl = ">=0.1,<1.0" [package.extras] @@ -3057,13 +2996,13 @@ files = [ [[package]] name = "marshmallow" -version = "3.23.2" +version = "3.26.0" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." optional = false python-versions = ">=3.9" files = [ - {file = "marshmallow-3.23.2-py3-none-any.whl", hash = "sha256:bcaf2d6fd74fb1459f8450e85d994997ad3e70036452cbfa4ab685acb19479b3"}, - {file = "marshmallow-3.23.2.tar.gz", hash = "sha256:c448ac6455ca4d794773f00bae22c2f351d62d739929f761dce5eacb5c468d7f"}, + {file = "marshmallow-3.26.0-py3-none-any.whl", hash = "sha256:1287bca04e6a5f4094822ac153c03da5e214a0a60bcd557b140f3e66991b8ca1"}, + {file = "marshmallow-3.26.0.tar.gz", hash = "sha256:eb36762a1cc76d7abf831e18a3a1b26d3d481bbc74581b8e532a3d3a8115e1cb"}, ] [package.dependencies] @@ -3071,7 +3010,7 @@ packaging = ">=17.0" [package.extras] dev = ["marshmallow[tests]", "pre-commit (>=3.5,<5.0)", "tox"] -docs = ["alabaster (==1.0.0)", "autodocsumm (==0.2.14)", "sphinx (==8.1.3)", "sphinx-issues (==5.0.0)", "sphinx-version-warning (==1.1.2)"] +docs = ["autodocsumm (==0.2.14)", "furo (==2024.8.6)", "sphinx (==8.1.3)", "sphinx-copybutton (==0.5.2)", "sphinx-issues (==5.0.0)", "sphinxext-opengraph (==0.9.1)"] tests = ["pytest", "simplejson"] [[package]] @@ -3156,25 +3095,6 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] -[[package]] -name = "mdit-py-plugins" -version = "0.4.2" -description = "Collection of plugins for markdown-it-py" -optional = false -python-versions = ">=3.8" -files = [ - {file = "mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636"}, - {file = "mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5"}, -] - -[package.dependencies] -markdown-it-py = ">=1.0.0,<4.0.0" - -[package.extras] -code-style = ["pre-commit"] -rtd = ["myst-parser", "sphinx-book-theme"] -testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] - [[package]] name = "mdurl" version = "0.1.2" @@ -3188,13 +3108,13 @@ files = [ [[package]] name = "mistune" -version = "3.1.0" +version = "3.1.1" description = "A sane and fast Markdown parser with useful plugins and renderers" optional = false python-versions = ">=3.8" files = [ - {file = "mistune-3.1.0-py3-none-any.whl", hash = "sha256:b05198cf6d671b3deba6c87ec6cf0d4eb7b72c524636eddb6dbf13823b52cee1"}, - {file = "mistune-3.1.0.tar.gz", hash = "sha256:dbcac2f78292b9dc066cd03b7a3a26b62d85f8159f2ea5fd28e55df79908d667"}, + {file = "mistune-3.1.1-py3-none-any.whl", hash = "sha256:02106ac2aa4f66e769debbfa028509a275069dcffce0dfa578edd7b991ee700a"}, + {file = "mistune-3.1.1.tar.gz", hash = "sha256:e0740d635f515119f7d1feb6f9b192ee60f0cc649f80a8f944f905706a21654c"}, ] [package.dependencies] @@ -3383,18 +3303,18 @@ test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>= [[package]] name = "nbconvert" -version = "7.16.4" +version = "7.16.6" description = "Converting Jupyter Notebooks (.ipynb files) to other formats. Output formats include asciidoc, html, latex, markdown, pdf, py, rst, script. nbconvert can be used both as a Python library (`import nbconvert`) or as a command line tool (invoked as `jupyter nbconvert ...`)." optional = false python-versions = ">=3.8" files = [ - {file = "nbconvert-7.16.4-py3-none-any.whl", hash = "sha256:05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3"}, - {file = "nbconvert-7.16.4.tar.gz", hash = "sha256:86ca91ba266b0a448dc96fa6c5b9d98affabde2867b363258703536807f9f7f4"}, + {file = "nbconvert-7.16.6-py3-none-any.whl", hash = "sha256:1375a7b67e0c2883678c48e506dc320febb57685e5ee67faa51b18a90f3a712b"}, + {file = "nbconvert-7.16.6.tar.gz", hash = "sha256:576a7e37c6480da7b8465eefa66c17844243816ce1ccc372633c6b71c3c0f582"}, ] [package.dependencies] beautifulsoup4 = "*" -bleach = "!=5.0.0" +bleach = {version = "!=5.0.0", extras = ["css"]} defusedxml = "*" jinja2 = ">=3.0" jupyter-core = ">=4.7" @@ -3406,7 +3326,6 @@ nbformat = ">=5.7" packaging = "*" pandocfilters = ">=1.4.1" pygments = ">=2.4.1" -tinycss2 = "*" traitlets = ">=5.1" [package.extras] @@ -3547,37 +3466,37 @@ test = ["pytest", "pytest-console-scripts", "pytest-jupyter", "pytest-tornasync" [[package]] name = "numba" -version = "0.60.0" +version = "0.61.0" description = "compiling Python code using LLVM" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" files = [ - {file = "numba-0.60.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d761de835cd38fb400d2c26bb103a2726f548dc30368853121d66201672e651"}, - {file = "numba-0.60.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:159e618ef213fba758837f9837fb402bbe65326e60ba0633dbe6c7f274d42c1b"}, - {file = "numba-0.60.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1527dc578b95c7c4ff248792ec33d097ba6bef9eda466c948b68dfc995c25781"}, - {file = "numba-0.60.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe0b28abb8d70f8160798f4de9d486143200f34458d34c4a214114e445d7124e"}, - {file = "numba-0.60.0-cp310-cp310-win_amd64.whl", hash = "sha256:19407ced081d7e2e4b8d8c36aa57b7452e0283871c296e12d798852bc7d7f198"}, - {file = "numba-0.60.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a17b70fc9e380ee29c42717e8cc0bfaa5556c416d94f9aa96ba13acb41bdece8"}, - {file = "numba-0.60.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3fb02b344a2a80efa6f677aa5c40cd5dd452e1b35f8d1c2af0dfd9ada9978e4b"}, - {file = "numba-0.60.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f4fde652ea604ea3c86508a3fb31556a6157b2c76c8b51b1d45eb40c8598703"}, - {file = "numba-0.60.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4142d7ac0210cc86432b818338a2bc368dc773a2f5cf1e32ff7c5b378bd63ee8"}, - {file = "numba-0.60.0-cp311-cp311-win_amd64.whl", hash = "sha256:cac02c041e9b5bc8cf8f2034ff6f0dbafccd1ae9590dc146b3a02a45e53af4e2"}, - {file = "numba-0.60.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d7da4098db31182fc5ffe4bc42c6f24cd7d1cb8a14b59fd755bfee32e34b8404"}, - {file = "numba-0.60.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38d6ea4c1f56417076ecf8fc327c831ae793282e0ff51080c5094cb726507b1c"}, - {file = "numba-0.60.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:62908d29fb6a3229c242e981ca27e32a6e606cc253fc9e8faeb0e48760de241e"}, - {file = "numba-0.60.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ebaa91538e996f708f1ab30ef4d3ddc344b64b5227b67a57aa74f401bb68b9d"}, - {file = "numba-0.60.0-cp312-cp312-win_amd64.whl", hash = "sha256:f75262e8fe7fa96db1dca93d53a194a38c46da28b112b8a4aca168f0df860347"}, - {file = "numba-0.60.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:01ef4cd7d83abe087d644eaa3d95831b777aa21d441a23703d649e06b8e06b74"}, - {file = "numba-0.60.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:819a3dfd4630d95fd574036f99e47212a1af41cbcb019bf8afac63ff56834449"}, - {file = "numba-0.60.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b983bd6ad82fe868493012487f34eae8bf7dd94654951404114f23c3466d34b"}, - {file = "numba-0.60.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c151748cd269ddeab66334bd754817ffc0cabd9433acb0f551697e5151917d25"}, - {file = "numba-0.60.0-cp39-cp39-win_amd64.whl", hash = "sha256:3031547a015710140e8c87226b4cfe927cac199835e5bf7d4fe5cb64e814e3ab"}, - {file = "numba-0.60.0.tar.gz", hash = "sha256:5df6158e5584eece5fc83294b949fd30b9f1125df7708862205217e068aabf16"}, -] - -[package.dependencies] -llvmlite = "==0.43.*" -numpy = ">=1.22,<2.1" + {file = "numba-0.61.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9cab9783a700fa428b1a54d65295122bc03b3de1d01fb819a6b9dbbddfdb8c43"}, + {file = "numba-0.61.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:46c5ae094fb3706f5adf9021bfb7fc11e44818d61afee695cdee4eadfed45e98"}, + {file = "numba-0.61.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6fb74e81aa78a2303e30593d8331327dfc0d2522b5db05ac967556a26db3ef87"}, + {file = "numba-0.61.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0ebbd4827091384ab8c4615ba1b3ca8bc639a3a000157d9c37ba85d34cd0da1b"}, + {file = "numba-0.61.0-cp310-cp310-win_amd64.whl", hash = "sha256:43aa4d7d10c542d3c78106b8481e0cbaaec788c39ee8e3d7901682748ffdf0b4"}, + {file = "numba-0.61.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:bf64c2d0f3d161af603de3825172fb83c2600bcb1d53ae8ea568d4c53ba6ac08"}, + {file = "numba-0.61.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:de5aa7904741425f28e1028b85850b31f0a245e9eb4f7c38507fb893283a066c"}, + {file = "numba-0.61.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21c2fe25019267a608e2710a6a947f557486b4b0478b02e45a81cf606a05a7d4"}, + {file = "numba-0.61.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:74250b26ed6a1428763e774dc5b2d4e70d93f73795635b5412b8346a4d054574"}, + {file = "numba-0.61.0-cp311-cp311-win_amd64.whl", hash = "sha256:b72bbc8708e98b3741ad0c63f9929c47b623cc4ee86e17030a4f3e301e8401ac"}, + {file = "numba-0.61.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:152146ecdbb8d8176f294e9f755411e6f270103a11c3ff50cecc413f794e52c8"}, + {file = "numba-0.61.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5cafa6095716fcb081618c28a8d27bf7c001e09696f595b41836dec114be2905"}, + {file = "numba-0.61.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ffe9fe373ed30638d6e20a0269f817b2c75d447141f55a675bfcf2d1fe2e87fb"}, + {file = "numba-0.61.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9f25f7fef0206d55c1cfb796ad833cbbc044e2884751e56e798351280038484c"}, + {file = "numba-0.61.0-cp312-cp312-win_amd64.whl", hash = "sha256:550d389573bc3b895e1ccb18289feea11d937011de4d278b09dc7ed585d1cdcb"}, + {file = "numba-0.61.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:b96fafbdcf6f69b69855273e988696aae4974115a815f6818fef4af7afa1f6b8"}, + {file = "numba-0.61.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f6c452dca1de8e60e593f7066df052dd8da09b243566ecd26d2b796e5d3087d"}, + {file = "numba-0.61.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:44240e694d4aa321430c97b21453e46014fe6c7b8b7d932afa7f6a88cc5d7e5e"}, + {file = "numba-0.61.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:764f0e47004f126f58c3b28e0a02374c420a9d15157b90806d68590f5c20cc89"}, + {file = "numba-0.61.0-cp313-cp313-win_amd64.whl", hash = "sha256:074cd38c5b1f9c65a4319d1f3928165f48975ef0537ad43385b2bd908e6e2e35"}, + {file = "numba-0.61.0.tar.gz", hash = "sha256:888d2e89b8160899e19591467e8fdd4970e07606e1fbc248f239c89818d5f925"}, +] + +[package.dependencies] +llvmlite = "==0.44.*" +numpy = ">=1.24,<2.2" [[package]] name = "numpy" @@ -3642,13 +3561,13 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] [[package]] name = "openai" -version = "1.58.1" +version = "1.60.2" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" files = [ - {file = "openai-1.58.1-py3-none-any.whl", hash = "sha256:e2910b1170a6b7f88ef491ac3a42c387f08bd3db533411f7ee391d166571d63c"}, - {file = "openai-1.58.1.tar.gz", hash = "sha256:f5a035fd01e141fc743f4b0e02c41ca49be8fab0866d3b67f5f29b4f4d3c0973"}, + {file = "openai-1.60.2-py3-none-any.whl", hash = "sha256:993bd11b96900b9098179c728026f016b4982ded7ee30dfcf4555eab1171fff9"}, + {file = "openai-1.60.2.tar.gz", hash = "sha256:a8f843e10f2855713007f491d96afb2694b11b5e02cb97c7d01a0be60bc5bb51"}, ] [package.dependencies] @@ -3680,6 +3599,234 @@ files = [ deprecated = ">=1.2.6" importlib-metadata = ">=6.0,<=8.5.0" +[[package]] +name = "opentelemetry-instrumentation" +version = "0.50b0" +description = "Instrumentation Tools & Auto Instrumentation for OpenTelemetry Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation-0.50b0-py3-none-any.whl", hash = "sha256:b8f9fc8812de36e1c6dffa5bfc6224df258841fb387b6dfe5df15099daa10630"}, + {file = "opentelemetry_instrumentation-0.50b0.tar.gz", hash = "sha256:7d98af72de8dec5323e5202e46122e5f908592b22c6d24733aad619f07d82979"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.4,<2.0" +opentelemetry-semantic-conventions = "0.50b0" +packaging = ">=18.0" +wrapt = ">=1.0.0,<2.0.0" + +[[package]] +name = "opentelemetry-instrumentation-asgi" +version = "0.50b0" +description = "ASGI instrumentation for OpenTelemetry" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation_asgi-0.50b0-py3-none-any.whl", hash = "sha256:2ba1297f746e55dec5a17fe825689da0613662fb25c004c3965a6c54b1d5be22"}, + {file = "opentelemetry_instrumentation_asgi-0.50b0.tar.gz", hash = "sha256:3ca4cb5616ae6a3e8ce86e7d5c360a8d8cc8ed722cf3dc8a5e44300774e87d49"}, +] + +[package.dependencies] +asgiref = ">=3.0,<4.0" +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.50b0" +opentelemetry-semantic-conventions = "0.50b0" +opentelemetry-util-http = "0.50b0" + +[package.extras] +instruments = ["asgiref (>=3.0,<4.0)"] + +[[package]] +name = "opentelemetry-instrumentation-dbapi" +version = "0.50b0" +description = "OpenTelemetry Database API instrumentation" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation_dbapi-0.50b0-py3-none-any.whl", hash = "sha256:23a730c3d7372b04b8a9507d2a67c5efbf92ff718eaa002b81ffbaf2b01d270f"}, + {file = "opentelemetry_instrumentation_dbapi-0.50b0.tar.gz", hash = "sha256:2603ca39e216893026c185ca8c44c326c0a9a763d5afff2309bd6195c50b7c49"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.50b0" +opentelemetry-semantic-conventions = "0.50b0" +wrapt = ">=1.0.0,<2.0.0" + +[[package]] +name = "opentelemetry-instrumentation-django" +version = "0.50b0" +description = "OpenTelemetry Instrumentation for Django" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation_django-0.50b0-py3-none-any.whl", hash = "sha256:ab7b4cd52b8f12420d968823f6bbfbc2a6ddb2af7a05fcb0d5b6755d338f1915"}, + {file = "opentelemetry_instrumentation_django-0.50b0.tar.gz", hash = "sha256:624fd0beb1ac827f2af31709c2da5cb55d8dc899c2449d6e8fcc9fa5538fd56b"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.50b0" +opentelemetry-instrumentation-wsgi = "0.50b0" +opentelemetry-semantic-conventions = "0.50b0" +opentelemetry-util-http = "0.50b0" + +[package.extras] +asgi = ["opentelemetry-instrumentation-asgi (==0.50b0)"] +instruments = ["django (>=1.10)"] + +[[package]] +name = "opentelemetry-instrumentation-fastapi" +version = "0.50b0" +description = "OpenTelemetry FastAPI Instrumentation" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation_fastapi-0.50b0-py3-none-any.whl", hash = "sha256:8f03b738495e4705fbae51a2826389c7369629dace89d0f291c06ffefdff5e52"}, + {file = "opentelemetry_instrumentation_fastapi-0.50b0.tar.gz", hash = "sha256:16b9181682136da210295def2bb304a32fb9bdee9a935cdc9da43567f7c1149e"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.50b0" +opentelemetry-instrumentation-asgi = "0.50b0" +opentelemetry-semantic-conventions = "0.50b0" +opentelemetry-util-http = "0.50b0" + +[package.extras] +instruments = ["fastapi (>=0.58,<1.0)"] + +[[package]] +name = "opentelemetry-instrumentation-flask" +version = "0.50b0" +description = "Flask instrumentation for OpenTelemetry" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation_flask-0.50b0-py3-none-any.whl", hash = "sha256:db7fb40191145f4356a793922c3fc80a33689e6a7c7c4c6def8aa1eedb0ac42a"}, + {file = "opentelemetry_instrumentation_flask-0.50b0.tar.gz", hash = "sha256:e56a820b1d43fdd5a57f7b481c4d6365210a48a1312c83af4185bc636977755f"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.50b0" +opentelemetry-instrumentation-wsgi = "0.50b0" +opentelemetry-semantic-conventions = "0.50b0" +opentelemetry-util-http = "0.50b0" +packaging = ">=21.0" + +[package.extras] +instruments = ["flask (>=1.0)"] + +[[package]] +name = "opentelemetry-instrumentation-psycopg2" +version = "0.50b0" +description = "OpenTelemetry psycopg2 instrumentation" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation_psycopg2-0.50b0-py3-none-any.whl", hash = "sha256:448297e63320711b5571f64bcf5d67ecf4856454c36d3bff6c3d01a4f8a48d18"}, + {file = "opentelemetry_instrumentation_psycopg2-0.50b0.tar.gz", hash = "sha256:86f8e507e98d8824f51bbc3c62121dbd4b8286063362f10b9dfa035a8da49f0b"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.50b0" +opentelemetry-instrumentation-dbapi = "0.50b0" + +[package.extras] +instruments = ["psycopg2 (>=2.7.3.1)"] + +[[package]] +name = "opentelemetry-instrumentation-requests" +version = "0.50b0" +description = "OpenTelemetry requests instrumentation" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation_requests-0.50b0-py3-none-any.whl", hash = "sha256:2c60a890988d6765de9230004d0af9071b3b2e1ddba4ca3b631cfb8a1722208d"}, + {file = "opentelemetry_instrumentation_requests-0.50b0.tar.gz", hash = "sha256:f8088c76f757985b492aad33331d21aec2f99c197472a57091c2e986a4b7ec8b"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.50b0" +opentelemetry-semantic-conventions = "0.50b0" +opentelemetry-util-http = "0.50b0" + +[package.extras] +instruments = ["requests (>=2.0,<3.0)"] + +[[package]] +name = "opentelemetry-instrumentation-urllib" +version = "0.50b0" +description = "OpenTelemetry urllib instrumentation" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation_urllib-0.50b0-py3-none-any.whl", hash = "sha256:55024940fd41fbdd5a6ab5b6397660900b7a75e23f9ff7f61b4ae1279710a3ec"}, + {file = "opentelemetry_instrumentation_urllib-0.50b0.tar.gz", hash = "sha256:af3e9710635c3f8a5ec38adc772dfef0c1022d0196007baf4b74504e920b5d31"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.50b0" +opentelemetry-semantic-conventions = "0.50b0" +opentelemetry-util-http = "0.50b0" + +[[package]] +name = "opentelemetry-instrumentation-urllib3" +version = "0.50b0" +description = "OpenTelemetry urllib3 instrumentation" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation_urllib3-0.50b0-py3-none-any.whl", hash = "sha256:c679b3908645b7d4d07c36960fe0efef490b403983e314108450146cc89bd675"}, + {file = "opentelemetry_instrumentation_urllib3-0.50b0.tar.gz", hash = "sha256:2c4a1d9f128eaf753871b1d90659c744691d039a6601ba546081347ae192bd0e"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.50b0" +opentelemetry-semantic-conventions = "0.50b0" +opentelemetry-util-http = "0.50b0" +wrapt = ">=1.0.0,<2.0.0" + +[package.extras] +instruments = ["urllib3 (>=1.0.0,<3.0.0)"] + +[[package]] +name = "opentelemetry-instrumentation-wsgi" +version = "0.50b0" +description = "WSGI Middleware for OpenTelemetry" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_instrumentation_wsgi-0.50b0-py3-none-any.whl", hash = "sha256:4bc0fdf52b603507d6170a25504f0ceea358d7e90a2c0e8794b7b7eca5ea355c"}, + {file = "opentelemetry_instrumentation_wsgi-0.50b0.tar.gz", hash = "sha256:c25b5f1b664d984a41546a34cf2f893dcde6cf56922f88c475864e7df37edf4a"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.50b0" +opentelemetry-semantic-conventions = "0.50b0" +opentelemetry-util-http = "0.50b0" + +[[package]] +name = "opentelemetry-resource-detector-azure" +version = "0.1.5" +description = "Azure Resource Detector for OpenTelemetry" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_resource_detector_azure-0.1.5-py3-none-any.whl", hash = "sha256:4dcc5d54ab5c3b11226af39509bc98979a8b9e0f8a24c1b888783755d3bf00eb"}, + {file = "opentelemetry_resource_detector_azure-0.1.5.tar.gz", hash = "sha256:e0ba658a87c69eebc806e75398cd0e9f68a8898ea62de99bc1b7083136403710"}, +] + +[package.dependencies] +opentelemetry-sdk = ">=1.21,<2.0" + [[package]] name = "opentelemetry-sdk" version = "1.29.0" @@ -3711,6 +3858,17 @@ files = [ deprecated = ">=1.2.6" opentelemetry-api = "1.29.0" +[[package]] +name = "opentelemetry-util-http" +version = "0.50b0" +description = "Web util for OpenTelemetry" +optional = false +python-versions = ">=3.8" +files = [ + {file = "opentelemetry_util_http-0.50b0-py3-none-any.whl", hash = "sha256:21f8aedac861ffa3b850f8c0a6c373026189eb8630ac6e14a2bf8c55695cc090"}, + {file = "opentelemetry_util_http-0.50b0.tar.gz", hash = "sha256:dc4606027e1bc02aabb9533cc330dd43f874fca492e4175c31d7154f341754af"}, +] + [[package]] name = "overrides" version = "7.7.0" @@ -3841,24 +3999,6 @@ files = [ qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] testing = ["docopt", "pytest"] -[[package]] -name = "partd" -version = "1.4.2" -description = "Appendable key-value storage" -optional = false -python-versions = ">=3.9" -files = [ - {file = "partd-1.4.2-py3-none-any.whl", hash = "sha256:978e4ac767ec4ba5b86c6eaa52e5a2a3bc748a2ca839e8cc798f1cc6ce6efb0f"}, - {file = "partd-1.4.2.tar.gz", hash = "sha256:d022c33afbdc8405c226621b015e8067888173d85f7f5ecebb3cafed9a20f02c"}, -] - -[package.dependencies] -locket = "*" -toolz = "*" - -[package.extras] -complete = ["blosc", "numpy (>=1.20.0)", "pandas (>=1.3)", "pyzmq"] - [[package]] name = "patsy" version = "1.0.1" @@ -3892,93 +4032,89 @@ ptyprocess = ">=0.5" [[package]] name = "pillow" -version = "11.0.0" +version = "11.1.0" description = "Python Imaging Library (Fork)" optional = false python-versions = ">=3.9" files = [ - {file = "pillow-11.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6619654954dc4936fcff82db8eb6401d3159ec6be81e33c6000dfd76ae189947"}, - {file = "pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b3c5ac4bed7519088103d9450a1107f76308ecf91d6dabc8a33a2fcfb18d0fba"}, - {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a65149d8ada1055029fcb665452b2814fe7d7082fcb0c5bed6db851cb69b2086"}, - {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88a58d8ac0cc0e7f3a014509f0455248a76629ca9b604eca7dc5927cc593c5e9"}, - {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c26845094b1af3c91852745ae78e3ea47abf3dbcd1cf962f16b9a5fbe3ee8488"}, - {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:1a61b54f87ab5786b8479f81c4b11f4d61702830354520837f8cc791ebba0f5f"}, - {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:674629ff60030d144b7bca2b8330225a9b11c482ed408813924619c6f302fdbb"}, - {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:598b4e238f13276e0008299bd2482003f48158e2b11826862b1eb2ad7c768b97"}, - {file = "pillow-11.0.0-cp310-cp310-win32.whl", hash = "sha256:9a0f748eaa434a41fccf8e1ee7a3eed68af1b690e75328fd7a60af123c193b50"}, - {file = "pillow-11.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:a5629742881bcbc1f42e840af185fd4d83a5edeb96475a575f4da50d6ede337c"}, - {file = "pillow-11.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:ee217c198f2e41f184f3869f3e485557296d505b5195c513b2bfe0062dc537f1"}, - {file = "pillow-11.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1c1d72714f429a521d8d2d018badc42414c3077eb187a59579f28e4270b4b0fc"}, - {file = "pillow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:499c3a1b0d6fc8213519e193796eb1a86a1be4b1877d678b30f83fd979811d1a"}, - {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8b2351c85d855293a299038e1f89db92a2f35e8d2f783489c6f0b2b5f3fe8a3"}, - {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f4dba50cfa56f910241eb7f883c20f1e7b1d8f7d91c750cd0b318bad443f4d5"}, - {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5ddbfd761ee00c12ee1be86c9c0683ecf5bb14c9772ddbd782085779a63dd55b"}, - {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:45c566eb10b8967d71bf1ab8e4a525e5a93519e29ea071459ce517f6b903d7fa"}, - {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b4fd7bd29610a83a8c9b564d457cf5bd92b4e11e79a4ee4716a63c959699b306"}, - {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cb929ca942d0ec4fac404cbf520ee6cac37bf35be479b970c4ffadf2b6a1cad9"}, - {file = "pillow-11.0.0-cp311-cp311-win32.whl", hash = "sha256:006bcdd307cc47ba43e924099a038cbf9591062e6c50e570819743f5607404f5"}, - {file = "pillow-11.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:52a2d8323a465f84faaba5236567d212c3668f2ab53e1c74c15583cf507a0291"}, - {file = "pillow-11.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:16095692a253047fe3ec028e951fa4221a1f3ed3d80c397e83541a3037ff67c9"}, - {file = "pillow-11.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2c0a187a92a1cb5ef2c8ed5412dd8d4334272617f532d4ad4de31e0495bd923"}, - {file = "pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:084a07ef0821cfe4858fe86652fffac8e187b6ae677e9906e192aafcc1b69903"}, - {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8069c5179902dcdce0be9bfc8235347fdbac249d23bd90514b7a47a72d9fecf4"}, - {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f02541ef64077f22bf4924f225c0fd1248c168f86e4b7abdedd87d6ebaceab0f"}, - {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fcb4621042ac4b7865c179bb972ed0da0218a076dc1820ffc48b1d74c1e37fe9"}, - {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7"}, - {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8853a3bf12afddfdf15f57c4b02d7ded92c7a75a5d7331d19f4f9572a89c17e6"}, - {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3107c66e43bda25359d5ef446f59c497de2b5ed4c7fdba0894f8d6cf3822dafc"}, - {file = "pillow-11.0.0-cp312-cp312-win32.whl", hash = "sha256:86510e3f5eca0ab87429dd77fafc04693195eec7fd6a137c389c3eeb4cfb77c6"}, - {file = "pillow-11.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:8ec4a89295cd6cd4d1058a5e6aec6bf51e0eaaf9714774e1bfac7cfc9051db47"}, - {file = "pillow-11.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:27a7860107500d813fcd203b4ea19b04babe79448268403172782754870dac25"}, - {file = "pillow-11.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bcd1fb5bb7b07f64c15618c89efcc2cfa3e95f0e3bcdbaf4642509de1942a699"}, - {file = "pillow-11.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0e038b0745997c7dcaae350d35859c9715c71e92ffb7e0f4a8e8a16732150f38"}, - {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ae08bd8ffc41aebf578c2af2f9d8749d91f448b3bfd41d7d9ff573d74f2a6b2"}, - {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d69bfd8ec3219ae71bcde1f942b728903cad25fafe3100ba2258b973bd2bc1b2"}, - {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:61b887f9ddba63ddf62fd02a3ba7add935d053b6dd7d58998c630e6dbade8527"}, - {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:c6a660307ca9d4867caa8d9ca2c2658ab685de83792d1876274991adec7b93fa"}, - {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:73e3a0200cdda995c7e43dd47436c1548f87a30bb27fb871f352a22ab8dcf45f"}, - {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fba162b8872d30fea8c52b258a542c5dfd7b235fb5cb352240c8d63b414013eb"}, - {file = "pillow-11.0.0-cp313-cp313-win32.whl", hash = "sha256:f1b82c27e89fffc6da125d5eb0ca6e68017faf5efc078128cfaa42cf5cb38798"}, - {file = "pillow-11.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:8ba470552b48e5835f1d23ecb936bb7f71d206f9dfeee64245f30c3270b994de"}, - {file = "pillow-11.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:846e193e103b41e984ac921b335df59195356ce3f71dcfd155aa79c603873b84"}, - {file = "pillow-11.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4ad70c4214f67d7466bea6a08061eba35c01b1b89eaa098040a35272a8efb22b"}, - {file = "pillow-11.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ec0d5af64f2e3d64a165f490d96368bb5dea8b8f9ad04487f9ab60dc4bb6003"}, - {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c809a70e43c7977c4a42aefd62f0131823ebf7dd73556fa5d5950f5b354087e2"}, - {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4b60c9520f7207aaf2e1d94de026682fc227806c6e1f55bba7606d1c94dd623a"}, - {file = "pillow-11.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1e2688958a840c822279fda0086fec1fdab2f95bf2b717b66871c4ad9859d7e8"}, - {file = "pillow-11.0.0-cp313-cp313t-win32.whl", hash = "sha256:607bbe123c74e272e381a8d1957083a9463401f7bd01287f50521ecb05a313f8"}, - {file = "pillow-11.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c39ed17edea3bc69c743a8dd3e9853b7509625c2462532e62baa0732163a904"}, - {file = "pillow-11.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:75acbbeb05b86bc53cbe7b7e6fe00fbcf82ad7c684b3ad82e3d711da9ba287d3"}, - {file = "pillow-11.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2e46773dc9f35a1dd28bd6981332fd7f27bec001a918a72a79b4133cf5291dba"}, - {file = "pillow-11.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2679d2258b7f1192b378e2893a8a0a0ca472234d4c2c0e6bdd3380e8dfa21b6a"}, - {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda2616eb2313cbb3eebbe51f19362eb434b18e3bb599466a1ffa76a033fb916"}, - {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ec184af98a121fb2da42642dea8a29ec80fc3efbaefb86d8fdd2606619045d"}, - {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:8594f42df584e5b4bb9281799698403f7af489fba84c34d53d1c4bfb71b7c4e7"}, - {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:c12b5ae868897c7338519c03049a806af85b9b8c237b7d675b8c5e089e4a618e"}, - {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:70fbbdacd1d271b77b7721fe3cdd2d537bbbd75d29e6300c672ec6bb38d9672f"}, - {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5178952973e588b3f1360868847334e9e3bf49d19e169bbbdfaf8398002419ae"}, - {file = "pillow-11.0.0-cp39-cp39-win32.whl", hash = "sha256:8c676b587da5673d3c75bd67dd2a8cdfeb282ca38a30f37950511766b26858c4"}, - {file = "pillow-11.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:94f3e1780abb45062287b4614a5bc0874519c86a777d4a7ad34978e86428b8dd"}, - {file = "pillow-11.0.0-cp39-cp39-win_arm64.whl", hash = "sha256:290f2cc809f9da7d6d622550bbf4c1e57518212da51b6a30fe8e0a270a5b78bd"}, - {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1187739620f2b365de756ce086fdb3604573337cc28a0d3ac4a01ab6b2d2a6d2"}, - {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fbbcb7b57dc9c794843e3d1258c0fbf0f48656d46ffe9e09b63bbd6e8cd5d0a2"}, - {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d203af30149ae339ad1b4f710d9844ed8796e97fda23ffbc4cc472968a47d0b"}, - {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a0d3b115009ebb8ac3d2ebec5c2982cc693da935f4ab7bb5c8ebe2f47d36f2"}, - {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:73853108f56df97baf2bb8b522f3578221e56f646ba345a372c78326710d3830"}, - {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e58876c91f97b0952eb766123bfef372792ab3f4e3e1f1a2267834c2ab131734"}, - {file = "pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316"}, - {file = "pillow-11.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5bd2d3bdb846d757055910f0a59792d33b555800813c3b39ada1829c372ccb06"}, - {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:375b8dd15a1f5d2feafff536d47e22f69625c1aa92f12b339ec0b2ca40263273"}, - {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:daffdf51ee5db69a82dd127eabecce20729e21f7a3680cf7cbb23f0829189790"}, - {file = "pillow-11.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7326a1787e3c7b0429659e0a944725e1b03eeaa10edd945a86dead1913383944"}, - {file = "pillow-11.0.0.tar.gz", hash = "sha256:72bacbaf24ac003fea9bff9837d1eedb6088758d41e100c1552930151f677739"}, + {file = "pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8"}, + {file = "pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482"}, + {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e"}, + {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269"}, + {file = "pillow-11.1.0-cp310-cp310-win32.whl", hash = "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49"}, + {file = "pillow-11.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a"}, + {file = "pillow-11.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65"}, + {file = "pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457"}, + {file = "pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1"}, + {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2"}, + {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96"}, + {file = "pillow-11.1.0-cp311-cp311-win32.whl", hash = "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f"}, + {file = "pillow-11.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761"}, + {file = "pillow-11.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71"}, + {file = "pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a"}, + {file = "pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f"}, + {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91"}, + {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c"}, + {file = "pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6"}, + {file = "pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf"}, + {file = "pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5"}, + {file = "pillow-11.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae98e14432d458fc3de11a77ccb3ae65ddce70f730e7c76140653048c71bfcbc"}, + {file = "pillow-11.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cc1331b6d5a6e144aeb5e626f4375f5b7ae9934ba620c0ac6b3e43d5e683a0f0"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:758e9d4ef15d3560214cddbc97b8ef3ef86ce04d62ddac17ad39ba87e89bd3b1"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b523466b1a31d0dcef7c5be1f20b942919b62fd6e9a9be199d035509cbefc0ec"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:9044b5e4f7083f209c4e35aa5dd54b1dd5b112b108648f5c902ad586d4f945c5"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3764d53e09cdedd91bee65c2527815d315c6b90d7b8b79759cc48d7bf5d4f114"}, + {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31eba6bbdd27dde97b0174ddf0297d7a9c3a507a8a1480e1e60ef914fe23d352"}, + {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b5d658fbd9f0d6eea113aea286b21d3cd4d3fd978157cbf2447a6035916506d3"}, + {file = "pillow-11.1.0-cp313-cp313-win32.whl", hash = "sha256:f86d3a7a9af5d826744fabf4afd15b9dfef44fe69a98541f666f66fbb8d3fef9"}, + {file = "pillow-11.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:593c5fd6be85da83656b93ffcccc2312d2d149d251e98588b14fbc288fd8909c"}, + {file = "pillow-11.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:11633d58b6ee5733bde153a8dafd25e505ea3d32e261accd388827ee987baf65"}, + {file = "pillow-11.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:70ca5ef3b3b1c4a0812b5c63c57c23b63e53bc38e758b37a951e5bc466449861"}, + {file = "pillow-11.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8000376f139d4d38d6851eb149b321a52bb8893a88dae8ee7d95840431977081"}, + {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee85f0696a17dd28fbcfceb59f9510aa71934b483d1f5601d1030c3c8304f3c"}, + {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:dd0e081319328928531df7a0e63621caf67652c8464303fd102141b785ef9547"}, + {file = "pillow-11.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e63e4e5081de46517099dc30abe418122f54531a6ae2ebc8680bcd7096860eab"}, + {file = "pillow-11.1.0-cp313-cp313t-win32.whl", hash = "sha256:dda60aa465b861324e65a78c9f5cf0f4bc713e4309f83bc387be158b077963d9"}, + {file = "pillow-11.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ad5db5781c774ab9a9b2c4302bbf0c1014960a0a7be63278d13ae6fdf88126fe"}, + {file = "pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756"}, + {file = "pillow-11.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:bf902d7413c82a1bfa08b06a070876132a5ae6b2388e2712aab3a7cbc02205c6"}, + {file = "pillow-11.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c1eec9d950b6fe688edee07138993e54ee4ae634c51443cfb7c1e7613322718e"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e275ee4cb11c262bd108ab2081f750db2a1c0b8c12c1897f27b160c8bd57bbc"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4db853948ce4e718f2fc775b75c37ba2efb6aaea41a1a5fc57f0af59eee774b2"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:ab8a209b8485d3db694fa97a896d96dd6533d63c22829043fd9de627060beade"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:54251ef02a2309b5eec99d151ebf5c9904b77976c8abdcbce7891ed22df53884"}, + {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5bb94705aea800051a743aa4874bb1397d4695fb0583ba5e425ee0328757f196"}, + {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:89dbdb3e6e9594d512780a5a1c42801879628b38e3efc7038094430844e271d8"}, + {file = "pillow-11.1.0-cp39-cp39-win32.whl", hash = "sha256:e5449ca63da169a2e6068dd0e2fcc8d91f9558aba89ff6d02121ca8ab11e79e5"}, + {file = "pillow-11.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:3362c6ca227e65c54bf71a5f88b3d4565ff1bcbc63ae72c34b07bbb1cc59a43f"}, + {file = "pillow-11.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:b20be51b37a75cc54c2c55def3fa2c65bb94ba859dde241cd0a4fd302de5ae0a"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0"}, + {file = "pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20"}, ] [package.extras] docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] fpx = ["olefile"] mic = ["olefile"] -tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] typing = ["typing-extensions"] xmp = ["defusedxml"] @@ -4098,13 +4234,13 @@ plot = ["matplotlib"] [[package]] name = "pre-commit" -version = "4.0.1" +version = "4.1.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false python-versions = ">=3.9" files = [ - {file = "pre_commit-4.0.1-py2.py3-none-any.whl", hash = "sha256:efde913840816312445dc98787724647c65473daefe420785f885e8ed9a06878"}, - {file = "pre_commit-4.0.1.tar.gz", hash = "sha256:80905ac375958c0444c65e9cebebd948b3cdb518f335a091a670a89d652139d2"}, + {file = "pre_commit-4.1.0-py2.py3-none-any.whl", hash = "sha256:d29e7cb346295bcc1cc75fc3e92e343495e3ea0196c9ec6ba53f49f10ab6ae7b"}, + {file = "pre_commit-4.1.0.tar.gz", hash = "sha256:ae3f018575a588e30dfddfab9a05448bfbd6b73d78709617b5a2b853549716d4"}, ] [package.dependencies] @@ -4130,13 +4266,13 @@ twisted = ["twisted"] [[package]] name = "prompt-toolkit" -version = "3.0.48" +version = "3.0.50" description = "Library for building powerful interactive command lines in Python" optional = false -python-versions = ">=3.7.0" +python-versions = ">=3.8.0" files = [ - {file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"}, - {file = "prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90"}, + {file = "prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198"}, + {file = "prompt_toolkit-3.0.50.tar.gz", hash = "sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab"}, ] [package.dependencies] @@ -4286,26 +4422,15 @@ files = [ [package.extras] tests = ["pytest"] -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] - [[package]] name = "pyaml-env" -version = "1.2.1" +version = "1.2.2" description = "Provides yaml file parsing with environment variable resolution" optional = false python-versions = ">=3.6" files = [ - {file = "pyaml_env-1.2.1-py3-none-any.whl", hash = "sha256:2e7da2d4bba0629711ade1a41864e5e200c84ded896a3d27e9f560fae7311c36"}, - {file = "pyaml_env-1.2.1.tar.gz", hash = "sha256:6d5dc98c8c82df743a132c196e79963050c9feb05b0a6f25f3ad77771d3d95b0"}, + {file = "pyaml_env-1.2.2-py3-none-any.whl", hash = "sha256:1c1c852a805a3ac9f9b57ef995520fdeea1c1a7a1edda6471cdf1b7c2ebb13c9"}, + {file = "pyaml_env-1.2.2.tar.gz", hash = "sha256:f83502516b6f1561ba7c2db9ced939a7a9933a66702d8071e98ad07da49a4bb4"}, ] [package.dependencies] @@ -4411,13 +4536,13 @@ files = [ [[package]] name = "pydantic" -version = "2.10.4" +version = "2.10.6" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.10.4-py3-none-any.whl", hash = "sha256:597e135ea68be3a37552fb524bc7d0d66dcf93d395acd93a00682f1efcb8ee3d"}, - {file = "pydantic-2.10.4.tar.gz", hash = "sha256:82f12e9723da6de4fe2ba888b5971157b3be7ad914267dea8f05f82b28254f06"}, + {file = "pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584"}, + {file = "pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"}, ] [package.dependencies] @@ -4554,13 +4679,13 @@ files = [ [[package]] name = "pygments" -version = "2.18.0" +version = "2.19.1" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.8" files = [ - {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, - {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, + {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"}, + {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"}, ] [package.extras] @@ -4588,28 +4713,30 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] [[package]] name = "pylance" -version = "0.16.0" +version = "0.20.0" description = "python wrapper for Lance columnar format" optional = false python-versions = ">=3.9" files = [ - {file = "pylance-0.16.0-cp39-abi3-macosx_10_15_x86_64.whl", hash = "sha256:ee0c11d15d6e1d5cac6fb5dbcd443db1dc8a581ea0251b1df6f0dcc80175a006"}, - {file = "pylance-0.16.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1b0ac2c25aed3f3afdbbb36f45ef076b84f7b2f8fc3edcdfb94c4c485b55b22d"}, - {file = "pylance-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9d2daf0fc1f6006c0f0b955cb976f0f25e7fa3813dc60c09446ac7a05172888"}, - {file = "pylance-0.16.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:449d961ee5c7eaff76a87bbfeaa77576747448f0a8b8d646db8ba0f6c282eee2"}, - {file = "pylance-0.16.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:85abe3171995a436adbfa205f751cc82ccef3332740626e0217f2b48e6b22d4e"}, - {file = "pylance-0.16.0-cp39-abi3-win_amd64.whl", hash = "sha256:77d821d8e9bd1bcaefb6a40d5dddf112740cf81b9ab317c9ebc60a7035ae9282"}, + {file = "pylance-0.20.0-cp39-abi3-macosx_10_15_x86_64.whl", hash = "sha256:fbb640b00567ff79d23a5994c0f0bc97587fcf74ece6ca568e77c453f70801c5"}, + {file = "pylance-0.20.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:c8e30f1b6429b843429fde8f3d6fb7e715153174161e3bcf29902e2d32ee471f"}, + {file = "pylance-0.20.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:032242a347ac909db81c0ade6384d82102f4ec61bc892d8caaa04b3d0a7b1613"}, + {file = "pylance-0.20.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:5320f11925524c1a67279afc4638cad60f61c36f11d3d9c2a91651489874be0d"}, + {file = "pylance-0.20.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:fa5acd4488c574f6017145eafd5b45b178d611a5cbcd2ed492e01013fc72f5a2"}, + {file = "pylance-0.20.0-cp39-abi3-win_amd64.whl", hash = "sha256:587850cddd0e669addd9414f378fa30527fc9020010cb73c842f026ea8a9b4ea"}, ] [package.dependencies] -numpy = ">=1.22,<2" -pyarrow = ">=12" +numpy = ">=1.22" +pyarrow = ">=14" [package.extras] benchmarks = ["pytest-benchmark"] +cuvs-cu11 = ["cuvs-cu11", "pylibraft-cu11"] +cuvs-cu12 = ["cuvs-cu12", "pylibraft-cu12"] dev = ["ruff (==0.4.1)"] -ray = ["ray[data]"] -tests = ["boto3", "datasets", "duckdb", "h5py (<3.11)", "ml-dtypes", "pandas", "pillow", "polars[pandas,pyarrow]", "pytest", "tensorflow", "tqdm"] +ray = ["ray[data] (<2.38)"] +tests = ["boto3", "datasets", "duckdb", "ml-dtypes", "pandas", "pillow", "polars[pandas,pyarrow]", "pytest", "tensorflow", "tqdm"] torch = ["torch"] [[package]] @@ -4632,13 +4759,13 @@ scipy = ">=1.0" [[package]] name = "pyparsing" -version = "3.2.0" +version = "3.2.1" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false python-versions = ">=3.9" files = [ - {file = "pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84"}, - {file = "pyparsing-3.2.0.tar.gz", hash = "sha256:cbf74e27246d595d9a74b186b810f6fbb86726dbf3b9532efb343f6d7294fe9c"}, + {file = "pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1"}, + {file = "pyparsing-3.2.1.tar.gz", hash = "sha256:61980854fd66de3a90028d679a954d5f2623e83144b5afe5ee86f43d762e5f0a"}, ] [package.extras] @@ -4668,13 +4795,13 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments [[package]] name = "pytest-asyncio" -version = "0.25.0" +version = "0.25.3" description = "Pytest support for asyncio" optional = false python-versions = ">=3.9" files = [ - {file = "pytest_asyncio-0.25.0-py3-none-any.whl", hash = "sha256:db5432d18eac6b7e28b46dcd9b69921b55c3b1086e85febfe04e70b18d9e81b3"}, - {file = "pytest_asyncio-0.25.0.tar.gz", hash = "sha256:8c0610303c9e0442a5db8604505fc0f545456ba1528824842b37b4a626cbf609"}, + {file = "pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3"}, + {file = "pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a"}, ] [package.dependencies] @@ -5029,34 +5156,21 @@ files = [ [package.dependencies] cffi = {version = "*", markers = "implementation_name == \"pypy\""} -[[package]] -name = "ratelimiter" -version = "1.2.0.post0" -description = "Simple python rate limiting object" -optional = false -python-versions = "*" -files = [ - {file = "ratelimiter-1.2.0.post0-py3-none-any.whl", hash = "sha256:a52be07bc0bb0b3674b4b304550f10c769bbb00fead3072e035904474259809f"}, - {file = "ratelimiter-1.2.0.post0.tar.gz", hash = "sha256:5c395dcabdbbde2e5178ef3f89b568a3066454a6ddc223b76473dac22f89b4f7"}, -] - -[package.extras] -test = ["pytest (>=3.0)", "pytest-asyncio"] - [[package]] name = "referencing" -version = "0.35.1" +version = "0.36.2" description = "JSON Referencing + Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"}, - {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"}, + {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"}, + {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"}, ] [package.dependencies] attrs = ">=22.2.0" rpds-py = ">=0.7.0" +typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""} [[package]] name = "regex" @@ -5200,21 +5314,6 @@ requests = ">=2.0.0" [package.extras] rsa = ["oauthlib[signedtoken] (>=3.0.0)"] -[[package]] -name = "retry" -version = "0.9.2" -description = "Easy to use retry decorator." -optional = false -python-versions = "*" -files = [ - {file = "retry-0.9.2-py2.py3-none-any.whl", hash = "sha256:ccddf89761fa2c726ab29391837d4327f819ea14d244c232a1d24c67a2f98606"}, - {file = "retry-0.9.2.tar.gz", hash = "sha256:f8bfa8b99b69c4506d6f5bd3b0aabf77f98cdb17f3c9fc3f5ca820033336fba4"}, -] - -[package.dependencies] -decorator = ">=3.4.2" -py = ">=1.4.26,<2.0.0" - [[package]] name = "rfc3339-validator" version = "0.1.4" @@ -5387,68 +5486,68 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.8.4" +version = "0.9.3" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.8.4-py3-none-linux_armv6l.whl", hash = "sha256:58072f0c06080276804c6a4e21a9045a706584a958e644353603d36ca1eb8a60"}, - {file = "ruff-0.8.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ffb60904651c00a1e0b8df594591770018a0f04587f7deeb3838344fe3adabac"}, - {file = "ruff-0.8.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:6ddf5d654ac0d44389f6bf05cee4caeefc3132a64b58ea46738111d687352296"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e248b1f0fa2749edd3350a2a342b67b43a2627434c059a063418e3d375cfe643"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bf197b98ed86e417412ee3b6c893f44c8864f816451441483253d5ff22c0e81e"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c41319b85faa3aadd4d30cb1cffdd9ac6b89704ff79f7664b853785b48eccdf3"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:9f8402b7c4f96463f135e936d9ab77b65711fcd5d72e5d67597b543bbb43cf3f"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4e56b3baa9c23d324ead112a4fdf20db9a3f8f29eeabff1355114dd96014604"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:736272574e97157f7edbbb43b1d046125fce9e7d8d583d5d65d0c9bf2c15addf"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fe710ab6061592521f902fca7ebcb9fabd27bc7c57c764298b1c1f15fff720"}, - {file = "ruff-0.8.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:13e9ec6d6b55f6da412d59953d65d66e760d583dd3c1c72bf1f26435b5bfdbae"}, - {file = "ruff-0.8.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:97d9aefef725348ad77d6db98b726cfdb075a40b936c7984088804dfd38268a7"}, - {file = "ruff-0.8.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:ab78e33325a6f5374e04c2ab924a3367d69a0da36f8c9cb6b894a62017506111"}, - {file = "ruff-0.8.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:8ef06f66f4a05c3ddbc9121a8b0cecccd92c5bf3dd43b5472ffe40b8ca10f0f8"}, - {file = "ruff-0.8.4-py3-none-win32.whl", hash = "sha256:552fb6d861320958ca5e15f28b20a3d071aa83b93caee33a87b471f99a6c0835"}, - {file = "ruff-0.8.4-py3-none-win_amd64.whl", hash = "sha256:f21a1143776f8656d7f364bd264a9d60f01b7f52243fbe90e7670c0dfe0cf65d"}, - {file = "ruff-0.8.4-py3-none-win_arm64.whl", hash = "sha256:9183dd615d8df50defa8b1d9a074053891ba39025cf5ae88e8bcb52edcc4bf08"}, - {file = "ruff-0.8.4.tar.gz", hash = "sha256:0d5f89f254836799af1615798caa5f80b7f935d7a670fad66c5007928e57ace8"}, + {file = "ruff-0.9.3-py3-none-linux_armv6l.whl", hash = "sha256:7f39b879064c7d9670197d91124a75d118d00b0990586549949aae80cdc16624"}, + {file = "ruff-0.9.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a187171e7c09efa4b4cc30ee5d0d55a8d6c5311b3e1b74ac5cb96cc89bafc43c"}, + {file = "ruff-0.9.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c59ab92f8e92d6725b7ded9d4a31be3ef42688a115c6d3da9457a5bda140e2b4"}, + {file = "ruff-0.9.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dc153c25e715be41bb228bc651c1e9b1a88d5c6e5ed0194fa0dfea02b026439"}, + {file = "ruff-0.9.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:646909a1e25e0dc28fbc529eab8eb7bb583079628e8cbe738192853dbbe43af5"}, + {file = "ruff-0.9.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a5a46e09355695fbdbb30ed9889d6cf1c61b77b700a9fafc21b41f097bfbba4"}, + {file = "ruff-0.9.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c4bb09d2bbb394e3730d0918c00276e79b2de70ec2a5231cd4ebb51a57df9ba1"}, + {file = "ruff-0.9.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96a87ec31dc1044d8c2da2ebbed1c456d9b561e7d087734336518181b26b3aa5"}, + {file = "ruff-0.9.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bb7554aca6f842645022fe2d301c264e6925baa708b392867b7a62645304df4"}, + {file = "ruff-0.9.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabc332b7075a914ecea912cd1f3d4370489c8018f2c945a30bcc934e3bc06a6"}, + {file = "ruff-0.9.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:33866c3cc2a575cbd546f2cd02bdd466fed65118e4365ee538a3deffd6fcb730"}, + {file = "ruff-0.9.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:006e5de2621304c8810bcd2ee101587712fa93b4f955ed0985907a36c427e0c2"}, + {file = "ruff-0.9.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:ba6eea4459dbd6b1be4e6bfc766079fb9b8dd2e5a35aff6baee4d9b1514ea519"}, + {file = "ruff-0.9.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:90230a6b8055ad47d3325e9ee8f8a9ae7e273078a66401ac66df68943ced029b"}, + {file = "ruff-0.9.3-py3-none-win32.whl", hash = "sha256:eabe5eb2c19a42f4808c03b82bd313fc84d4e395133fb3fc1b1516170a31213c"}, + {file = "ruff-0.9.3-py3-none-win_amd64.whl", hash = "sha256:040ceb7f20791dfa0e78b4230ee9dce23da3b64dd5848e40e3bf3ab76468dcf4"}, + {file = "ruff-0.9.3-py3-none-win_arm64.whl", hash = "sha256:800d773f6d4d33b0a3c60e2c6ae8f4c202ea2de056365acfa519aa48acf28e0b"}, + {file = "ruff-0.9.3.tar.gz", hash = "sha256:8293f89985a090ebc3ed1064df31f3b4b56320cdfcec8b60d3295bddb955c22a"}, ] [[package]] name = "scikit-learn" -version = "1.6.0" +version = "1.6.1" description = "A set of python modules for machine learning and data mining" optional = false python-versions = ">=3.9" files = [ - {file = "scikit_learn-1.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:366fb3fa47dce90afed3d6106183f4978d6f24cfd595c2373424171b915ee718"}, - {file = "scikit_learn-1.6.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:59cd96a8d9f8dfd546f5d6e9787e1b989e981388d7803abbc9efdcde61e47460"}, - {file = "scikit_learn-1.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efa7a579606c73a0b3d210e33ea410ea9e1af7933fe324cb7e6fbafae4ea5948"}, - {file = "scikit_learn-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a46d3ca0f11a540b8eaddaf5e38172d8cd65a86cb3e3632161ec96c0cffb774c"}, - {file = "scikit_learn-1.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:5be4577769c5dde6e1b53de8e6520f9b664ab5861dd57acee47ad119fd7405d6"}, - {file = "scikit_learn-1.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1f50b4f24cf12a81c3c09958ae3b864d7534934ca66ded3822de4996d25d7285"}, - {file = "scikit_learn-1.6.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:eb9ae21f387826da14b0b9cb1034f5048ddb9182da429c689f5f4a87dc96930b"}, - {file = "scikit_learn-1.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0baa91eeb8c32632628874a5c91885eaedd23b71504d24227925080da075837a"}, - {file = "scikit_learn-1.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c716d13ba0a2f8762d96ff78d3e0cde90bc9c9b5c13d6ab6bb9b2d6ca6705fd"}, - {file = "scikit_learn-1.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:9aafd94bafc841b626681e626be27bf1233d5a0f20f0a6fdb4bee1a1963c6643"}, - {file = "scikit_learn-1.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:04a5ba45c12a5ff81518aa4f1604e826a45d20e53da47b15871526cda4ff5174"}, - {file = "scikit_learn-1.6.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:21fadfc2ad7a1ce8bd1d90f23d17875b84ec765eecbbfc924ff11fb73db582ce"}, - {file = "scikit_learn-1.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30f34bb5fde90e020653bb84dcb38b6c83f90c70680dbd8c38bd9becbad7a127"}, - {file = "scikit_learn-1.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1dad624cffe3062276a0881d4e441bc9e3b19d02d17757cd6ae79a9d192a0027"}, - {file = "scikit_learn-1.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:2fce7950a3fad85e0a61dc403df0f9345b53432ac0e47c50da210d22c60b6d85"}, - {file = "scikit_learn-1.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e5453b2e87ef8accedc5a8a4e6709f887ca01896cd7cc8a174fe39bd4bb00aef"}, - {file = "scikit_learn-1.6.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:5fe11794236fb83bead2af26a87ced5d26e3370b8487430818b915dafab1724e"}, - {file = "scikit_learn-1.6.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61fe3dcec0d82ae280877a818ab652f4988371e32dd5451e75251bece79668b1"}, - {file = "scikit_learn-1.6.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b44e3a51e181933bdf9a4953cc69c6025b40d2b49e238233f149b98849beb4bf"}, - {file = "scikit_learn-1.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:a17860a562bac54384454d40b3f6155200c1c737c9399e6a97962c63fce503ac"}, - {file = "scikit_learn-1.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:98717d3c152f6842d36a70f21e1468fb2f1a2f8f2624d9a3f382211798516426"}, - {file = "scikit_learn-1.6.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:34e20bfac8ff0ebe0ff20fb16a4d6df5dc4cc9ce383e00c2ab67a526a3c67b18"}, - {file = "scikit_learn-1.6.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eba06d75815406091419e06dd650b91ebd1c5f836392a0d833ff36447c2b1bfa"}, - {file = "scikit_learn-1.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b6916d1cec1ff163c7d281e699d7a6a709da2f2c5ec7b10547e08cc788ddd3ae"}, - {file = "scikit_learn-1.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:66b1cf721a9f07f518eb545098226796c399c64abdcbf91c2b95d625068363da"}, - {file = "scikit_learn-1.6.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7b35b60cf4cd6564b636e4a40516b3c61a4fa7a8b1f7a3ce80c38ebe04750bc3"}, - {file = "scikit_learn-1.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a73b1c2038c93bc7f4bf21f6c9828d5116c5d2268f7a20cfbbd41d3074d52083"}, - {file = "scikit_learn-1.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c3fa7d3dd5a0ec2d0baba0d644916fa2ab180ee37850c5d536245df916946bd"}, - {file = "scikit_learn-1.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:df778486a32518cda33818b7e3ce48c78cef1d5f640a6bc9d97c6d2e71449a51"}, - {file = "scikit_learn-1.6.0.tar.gz", hash = "sha256:9d58481f9f7499dff4196927aedd4285a0baec8caa3790efbe205f13de37dd6e"}, + {file = "scikit_learn-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d056391530ccd1e501056160e3c9673b4da4805eb67eb2bdf4e983e1f9c9204e"}, + {file = "scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36"}, + {file = "scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8634c4bd21a2a813e0a7e3900464e6d593162a29dd35d25bdf0103b3fce60ed5"}, + {file = "scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:775da975a471c4f6f467725dff0ced5c7ac7bda5e9316b260225b48475279a1b"}, + {file = "scikit_learn-1.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:8a600c31592bd7dab31e1c61b9bbd6dea1b3433e67d264d17ce1017dbdce8002"}, + {file = "scikit_learn-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72abc587c75234935e97d09aa4913a82f7b03ee0b74111dcc2881cba3c5a7b33"}, + {file = "scikit_learn-1.6.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b3b00cdc8f1317b5f33191df1386c0befd16625f49d979fe77a8d44cae82410d"}, + {file = "scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4765af3386811c3ca21638f63b9cf5ecf66261cc4815c1db3f1e7dc7b79db2"}, + {file = "scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25fc636bdaf1cc2f4a124a116312d837148b5e10872147bdaf4887926b8c03d8"}, + {file = "scikit_learn-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415"}, + {file = "scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b"}, + {file = "scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2"}, + {file = "scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f"}, + {file = "scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86"}, + {file = "scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52"}, + {file = "scikit_learn-1.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ffa1e9e25b3d93990e74a4be2c2fc61ee5af85811562f1288d5d055880c4322"}, + {file = "scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dc5cf3d68c5a20ad6d571584c0750ec641cc46aeef1c1507be51300e6003a7e1"}, + {file = "scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c06beb2e839ecc641366000ca84f3cf6fa9faa1777e29cf0c04be6e4d096a348"}, + {file = "scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97"}, + {file = "scikit_learn-1.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:7a1c43c8ec9fde528d664d947dc4c0789be4077a3647f232869f41d9bf50e0fb"}, + {file = "scikit_learn-1.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a17c1dea1d56dcda2fac315712f3651a1fea86565b64b48fa1bc090249cbf236"}, + {file = "scikit_learn-1.6.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a7aa5f9908f0f28f4edaa6963c0a6183f1911e63a69aa03782f0d924c830a35"}, + {file = "scikit_learn-1.6.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691"}, + {file = "scikit_learn-1.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:3f59fe08dc03ea158605170eb52b22a105f238a5d512c4470ddeca71feae8e5f"}, + {file = "scikit_learn-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6849dd3234e87f55dce1db34c89a810b489ead832aaf4d4550b7ea85628be6c1"}, + {file = "scikit_learn-1.6.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e"}, + {file = "scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44a17798172df1d3c1065e8fcf9019183f06c87609b49a124ebdf57ae6cb0107"}, + {file = "scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8b7a3b86e411e4bce21186e1c180d792f3d99223dcfa3b4f597ecc92fa1a422"}, + {file = "scikit_learn-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:7a73d457070e3318e32bdb3aa79a8d990474f19035464dfd8bede2883ab5dc3b"}, + {file = "scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e"}, ] [package.dependencies] @@ -5547,23 +5646,34 @@ win32 = ["pywin32"] [[package]] name = "setuptools" -version = "75.6.0" +version = "75.8.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" files = [ - {file = "setuptools-75.6.0-py3-none-any.whl", hash = "sha256:ce74b49e8f7110f9bf04883b730f4765b774ef3ef28f722cce7c273d253aaf7d"}, - {file = "setuptools-75.6.0.tar.gz", hash = "sha256:8199222558df7c86216af4f84c30e9b34a61d8ba19366cc914424cdbd28252f6"}, + {file = "setuptools-75.8.0-py3-none-any.whl", hash = "sha256:e3982f444617239225d675215d51f6ba05f845d4eec313da4418fdbb56fb27e3"}, + {file = "setuptools-75.8.0.tar.gz", hash = "sha256:c5afc8f407c626b8313a86e10311dd3f661c6cd9c09d4bf8c15c0e11f9f2b0e6"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.7.0)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.8.0)"] core = ["importlib_metadata (>=6)", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib_metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib_metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.14.*)", "pytest-mypy"] + +[[package]] +name = "shellingham" +version = "1.5.4" +description = "Tool to Detect Surrounding Shell" +optional = false +python-versions = ">=3.7" +files = [ + {file = "shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686"}, + {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"}, +] [[package]] name = "six" @@ -5644,20 +5754,20 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] [[package]] name = "starlette" -version = "0.41.3" +version = "0.45.3" description = "The little ASGI library that shines." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "starlette-0.41.3-py3-none-any.whl", hash = "sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7"}, - {file = "starlette-0.41.3.tar.gz", hash = "sha256:0e4ab3d16522a255be6b28260b938eae2482f98ce5cc934cb08dce8dc3ba5835"}, + {file = "starlette-0.45.3-py3-none-any.whl", hash = "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d"}, + {file = "starlette-0.45.3.tar.gz", hash = "sha256:2cbcba2a75806f8a41c722141486f37c28e30a0921c5f6fe4346cb0dcee1302f"}, ] [package.dependencies] -anyio = ">=3.4.0,<5" +anyio = ">=3.6.2,<5" [package.extras] -full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] +full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.18)", "pyyaml"] [[package]] name = "statsmodels" @@ -5710,26 +5820,6 @@ build = ["cython (>=3.0.10)"] develop = ["colorama", "cython (>=3.0.10)", "cython (>=3.0.10,<4)", "flake8", "isort", "joblib", "matplotlib (>=3)", "pytest (>=7.3.0,<8)", "pytest-cov", "pytest-randomly", "pytest-xdist", "pywinpty", "setuptools-scm[toml] (>=8.0,<9.0)"] docs = ["ipykernel", "jupyter-client", "matplotlib", "nbconvert", "nbformat", "numpydoc", "pandas-datareader", "sphinx"] -[[package]] -name = "swifter" -version = "1.4.0" -description = "A package which efficiently applies any function to a pandas dataframe or series in the fastest available manner" -optional = false -python-versions = "*" -files = [ - {file = "swifter-1.4.0.tar.gz", hash = "sha256:e1bb74476a21b3f07a17aa18c97fdcba8599726bd17da732f09dabcc50e26ba0"}, -] - -[package.dependencies] -dask = {version = ">=2.10.0", extras = ["dataframe"]} -pandas = ">=1.0.0" -psutil = ">=5.6.6" -tqdm = ">=4.33.0" - -[package.extras] -groupby = ["ray (>=1.0.0)"] -notebook = ["ipywidgets (>=7.0.0)"] - [[package]] name = "tenacity" version = "9.0.0" @@ -5766,25 +5856,6 @@ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["pre-commit", "pytest (>=7.0)", "pytest-timeout"] typing = ["mypy (>=1.6,<2.0)", "traitlets (>=5.11.1)"] -[[package]] -name = "textual" -version = "0.78.0" -description = "Modern Text User Interface framework" -optional = false -python-versions = "<4.0.0,>=3.8.1" -files = [ - {file = "textual-0.78.0-py3-none-any.whl", hash = "sha256:c9d3c7dc467c37ee2e54a0283ac2c85dac35e4fc949518ed054a65b8e3e9b822"}, - {file = "textual-0.78.0.tar.gz", hash = "sha256:421f508b0d41ea0b8ecf273bf83f0d19376667eb0a87f70575252395d90ab315"}, -] - -[package.dependencies] -markdown-it-py = {version = ">=2.1.0", extras = ["linkify", "plugins"]} -rich = ">=13.3.3" -typing-extensions = ">=4.4.0,<5.0.0" - -[package.extras] -syntax = ["tree-sitter (>=0.20.1,<0.21.0)", "tree-sitter-languages (==1.10.2)"] - [[package]] name = "threadpoolctl" version = "3.5.0" @@ -5798,47 +5869,42 @@ files = [ [[package]] name = "tiktoken" -version = "0.7.0" +version = "0.8.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"}, - {file = "tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e54be9a2cd2f6d6ffa3517b064983fb695c9a9d8aa7d574d1ef3c3f931a99225"}, - {file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79383a6e2c654c6040e5f8506f3750db9ddd71b550c724e673203b4f6b4b4590"}, - {file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d4511c52caacf3c4981d1ae2df85908bd31853f33d30b345c8b6830763f769c"}, - {file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:13c94efacdd3de9aff824a788353aa5749c0faee1fbe3816df365ea450b82311"}, - {file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8e58c7eb29d2ab35a7a8929cbeea60216a4ccdf42efa8974d8e176d50c9a3df5"}, - {file = "tiktoken-0.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:21a20c3bd1dd3e55b91c1331bf25f4af522c525e771691adbc9a69336fa7f702"}, - {file = "tiktoken-0.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:10c7674f81e6e350fcbed7c09a65bca9356eaab27fb2dac65a1e440f2bcfe30f"}, - {file = "tiktoken-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:084cec29713bc9d4189a937f8a35dbdfa785bd1235a34c1124fe2323821ee93f"}, - {file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:811229fde1652fedcca7c6dfe76724d0908775b353556d8a71ed74d866f73f7b"}, - {file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86b6e7dc2e7ad1b3757e8a24597415bafcfb454cebf9a33a01f2e6ba2e663992"}, - {file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1063c5748be36344c7e18c7913c53e2cca116764c2080177e57d62c7ad4576d1"}, - {file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:20295d21419bfcca092644f7e2f2138ff947a6eb8cfc732c09cc7d76988d4a89"}, - {file = "tiktoken-0.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:959d993749b083acc57a317cbc643fb85c014d055b2119b739487288f4e5d1cb"}, - {file = "tiktoken-0.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:71c55d066388c55a9c00f61d2c456a6086673ab7dec22dd739c23f77195b1908"}, - {file = "tiktoken-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:09ed925bccaa8043e34c519fbb2f99110bd07c6fd67714793c21ac298e449410"}, - {file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03c6c40ff1db0f48a7b4d2dafeae73a5607aacb472fa11f125e7baf9dce73704"}, - {file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d20b5c6af30e621b4aca094ee61777a44118f52d886dbe4f02b70dfe05c15350"}, - {file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d427614c3e074004efa2f2411e16c826f9df427d3c70a54725cae860f09e4bf4"}, - {file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8c46d7af7b8c6987fac9b9f61041b452afe92eb087d29c9ce54951280f899a97"}, - {file = "tiktoken-0.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:0bc603c30b9e371e7c4c7935aba02af5994a909fc3c0fe66e7004070858d3f8f"}, - {file = "tiktoken-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2398fecd38c921bcd68418675a6d155fad5f5e14c2e92fcf5fe566fa5485a858"}, - {file = "tiktoken-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8f5f6afb52fb8a7ea1c811e435e4188f2bef81b5e0f7a8635cc79b0eef0193d6"}, - {file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:861f9ee616766d736be4147abac500732b505bf7013cfaf019b85892637f235e"}, - {file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54031f95c6939f6b78122c0aa03a93273a96365103793a22e1793ee86da31685"}, - {file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:fffdcb319b614cf14f04d02a52e26b1d1ae14a570f90e9b55461a72672f7b13d"}, - {file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c72baaeaefa03ff9ba9688624143c858d1f6b755bb85d456d59e529e17234769"}, - {file = "tiktoken-0.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:131b8aeb043a8f112aad9f46011dced25d62629091e51d9dc1adbf4a1cc6aa98"}, - {file = "tiktoken-0.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cabc6dc77460df44ec5b879e68692c63551ae4fae7460dd4ff17181df75f1db7"}, - {file = "tiktoken-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8d57f29171255f74c0aeacd0651e29aa47dff6f070cb9f35ebc14c82278f3b25"}, - {file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ee92776fdbb3efa02a83f968c19d4997a55c8e9ce7be821ceee04a1d1ee149c"}, - {file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e215292e99cb41fbc96988ef62ea63bb0ce1e15f2c147a61acc319f8b4cbe5bf"}, - {file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8a81bac94769cab437dd3ab0b8a4bc4e0f9cf6835bcaa88de71f39af1791727a"}, - {file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d6d73ea93e91d5ca771256dfc9d1d29f5a554b83821a1dc0891987636e0ae226"}, - {file = "tiktoken-0.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:2bcb28ddf79ffa424f171dfeef9a4daff61a94c631ca6813f43967cb263b83b9"}, - {file = "tiktoken-0.7.0.tar.gz", hash = "sha256:1077266e949c24e0291f6c350433c6f0971365ece2b173a23bc3b9f9defef6b6"}, + {file = "tiktoken-0.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b07e33283463089c81ef1467180e3e00ab00d46c2c4bbcef0acab5f771d6695e"}, + {file = "tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9269348cb650726f44dd3bbb3f9110ac19a8dcc8f54949ad3ef652ca22a38e21"}, + {file = "tiktoken-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e13f37bc4ef2d012731e93e0fef21dc3b7aea5bb9009618de9a4026844e560"}, + {file = "tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f13d13c981511331eac0d01a59b5df7c0d4060a8be1e378672822213da51e0a2"}, + {file = "tiktoken-0.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6b2ddbc79a22621ce8b1166afa9f9a888a664a579350dc7c09346a3b5de837d9"}, + {file = "tiktoken-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d8c2d0e5ba6453a290b86cd65fc51fedf247e1ba170191715b049dac1f628005"}, + {file = "tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1"}, + {file = "tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a"}, + {file = "tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d"}, + {file = "tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47"}, + {file = "tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419"}, + {file = "tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99"}, + {file = "tiktoken-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:881839cfeae051b3628d9823b2e56b5cc93a9e2efb435f4cf15f17dc45f21586"}, + {file = "tiktoken-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fe9399bdc3f29d428f16a2f86c3c8ec20be3eac5f53693ce4980371c3245729b"}, + {file = "tiktoken-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a58deb7075d5b69237a3ff4bb51a726670419db6ea62bdcd8bd80c78497d7ab"}, + {file = "tiktoken-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2908c0d043a7d03ebd80347266b0e58440bdef5564f84f4d29fb235b5df3b04"}, + {file = "tiktoken-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:294440d21a2a51e12d4238e68a5972095534fe9878be57d905c476017bff99fc"}, + {file = "tiktoken-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:d8f3192733ac4d77977432947d563d7e1b310b96497acd3c196c9bddb36ed9db"}, + {file = "tiktoken-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:02be1666096aff7da6cbd7cdaa8e7917bfed3467cd64b38b1f112e96d3b06a24"}, + {file = "tiktoken-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94ff53c5c74b535b2cbf431d907fc13c678bbd009ee633a2aca269a04389f9a"}, + {file = "tiktoken-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b231f5e8982c245ee3065cd84a4712d64692348bc609d84467c57b4b72dcbc5"}, + {file = "tiktoken-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4177faa809bd55f699e88c96d9bb4635d22e3f59d635ba6fd9ffedf7150b9953"}, + {file = "tiktoken-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5376b6f8dc4753cd81ead935c5f518fa0fbe7e133d9e25f648d8c4dabdd4bad7"}, + {file = "tiktoken-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:18228d624807d66c87acd8f25fc135665617cab220671eb65b50f5d70fa51f69"}, + {file = "tiktoken-0.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e17807445f0cf1f25771c9d86496bd8b5c376f7419912519699f3cc4dc5c12e"}, + {file = "tiktoken-0.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:886f80bd339578bbdba6ed6d0567a0d5c6cfe198d9e587ba6c447654c65b8edc"}, + {file = "tiktoken-0.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6adc8323016d7758d6de7313527f755b0fc6c72985b7d9291be5d96d73ecd1e1"}, + {file = "tiktoken-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b591fb2b30d6a72121a80be24ec7a0e9eb51c5500ddc7e4c2496516dd5e3816b"}, + {file = "tiktoken-0.8.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:845287b9798e476b4d762c3ebda5102be87ca26e5d2c9854002825d60cdb815d"}, + {file = "tiktoken-0.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:1473cfe584252dc3fa62adceb5b1c763c1874e04511b197da4e6de51d6ce5a02"}, + {file = "tiktoken-0.8.0.tar.gz", hash = "sha256:9ccbb2740f24542534369c5635cfd9b2b3c2490754a78ac8831d99f89f94eeb2"}, ] [package.dependencies] @@ -5907,17 +5973,6 @@ files = [ {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] -[[package]] -name = "toolz" -version = "1.0.0" -description = "List processing tools and functional utilities" -optional = false -python-versions = ">=3.8" -files = [ - {file = "toolz-1.0.0-py3-none-any.whl", hash = "sha256:292c8f1c4e7516bf9086f8850935c799a874039c8bcf959d47b600e4c44a6236"}, - {file = "toolz-1.0.0.tar.gz", hash = "sha256:2c86e3d9a04798ac556793bced838816296a2f085017664e4995cb40a1047a02"}, -] - [[package]] name = "tornado" version = "6.4.2" @@ -5974,6 +6029,23 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"] +[[package]] +name = "typer" +version = "0.15.1" +description = "Typer, build great CLIs. Easy to code. Based on Python type hints." +optional = false +python-versions = ">=3.7" +files = [ + {file = "typer-0.15.1-py3-none-any.whl", hash = "sha256:7994fb7b8155b64d3402518560648446072864beefd44aa2dc36972a5972e847"}, + {file = "typer-0.15.1.tar.gz", hash = "sha256:a0588c0a7fa68a1978a069818657778f86abe6ff5ea6abf472f940a08bfe4f0a"}, +] + +[package.dependencies] +click = ">=8.0.0" +rich = ">=10.11.0" +shellingham = ">=1.3.0" +typing-extensions = ">=3.7.4.3" + [[package]] name = "types-python-dateutil" version = "2.9.0.20241206" @@ -5998,29 +6070,15 @@ files = [ [[package]] name = "tzdata" -version = "2024.2" +version = "2025.1" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" files = [ - {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"}, - {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, + {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, + {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, ] -[[package]] -name = "uc-micro-py" -version = "1.0.3" -description = "Micro subset of unicode data files for linkify-it-py projects." -optional = false -python-versions = ">=3.7" -files = [ - {file = "uc-micro-py-1.0.3.tar.gz", hash = "sha256:d321b92cff673ec58027c04015fcaa8bb1e005478643ff4a500882eaab88c48a"}, - {file = "uc_micro_py-1.0.3-py3-none-any.whl", hash = "sha256:db1dffff340817673d7b466ec86114a9dc0e9d4d9b5ba229d9d60e5c12600cd5"}, -] - -[package.extras] -test = ["coverage", "pytest", "pytest-cov"] - [[package]] name = "umap-learn" version = "0.5.7" @@ -6095,59 +6153,15 @@ typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [package.extras] standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] -[[package]] -name = "uvloop" -version = "0.20.0" -description = "Fast implementation of asyncio event loop on top of libuv" -optional = false -python-versions = ">=3.8.0" -files = [ - {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9ebafa0b96c62881d5cafa02d9da2e44c23f9f0cd829f3a32a6aff771449c996"}, - {file = "uvloop-0.20.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:35968fc697b0527a06e134999eef859b4034b37aebca537daeb598b9d45a137b"}, - {file = "uvloop-0.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b16696f10e59d7580979b420eedf6650010a4a9c3bd8113f24a103dfdb770b10"}, - {file = "uvloop-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b04d96188d365151d1af41fa2d23257b674e7ead68cfd61c725a422764062ae"}, - {file = "uvloop-0.20.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:94707205efbe809dfa3a0d09c08bef1352f5d3d6612a506f10a319933757c006"}, - {file = "uvloop-0.20.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:89e8d33bb88d7263f74dc57d69f0063e06b5a5ce50bb9a6b32f5fcbe655f9e73"}, - {file = "uvloop-0.20.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e50289c101495e0d1bb0bfcb4a60adde56e32f4449a67216a1ab2750aa84f037"}, - {file = "uvloop-0.20.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e237f9c1e8a00e7d9ddaa288e535dc337a39bcbf679f290aee9d26df9e72bce9"}, - {file = "uvloop-0.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:746242cd703dc2b37f9d8b9f173749c15e9a918ddb021575a0205ec29a38d31e"}, - {file = "uvloop-0.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82edbfd3df39fb3d108fc079ebc461330f7c2e33dbd002d146bf7c445ba6e756"}, - {file = "uvloop-0.20.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:80dc1b139516be2077b3e57ce1cb65bfed09149e1d175e0478e7a987863b68f0"}, - {file = "uvloop-0.20.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4f44af67bf39af25db4c1ac27e82e9665717f9c26af2369c404be865c8818dcf"}, - {file = "uvloop-0.20.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4b75f2950ddb6feed85336412b9a0c310a2edbcf4cf931aa5cfe29034829676d"}, - {file = "uvloop-0.20.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:77fbc69c287596880ecec2d4c7a62346bef08b6209749bf6ce8c22bbaca0239e"}, - {file = "uvloop-0.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6462c95f48e2d8d4c993a2950cd3d31ab061864d1c226bbf0ee2f1a8f36674b9"}, - {file = "uvloop-0.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:649c33034979273fa71aa25d0fe120ad1777c551d8c4cd2c0c9851d88fcb13ab"}, - {file = "uvloop-0.20.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3a609780e942d43a275a617c0839d85f95c334bad29c4c0918252085113285b5"}, - {file = "uvloop-0.20.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aea15c78e0d9ad6555ed201344ae36db5c63d428818b4b2a42842b3870127c00"}, - {file = "uvloop-0.20.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0e94b221295b5e69de57a1bd4aeb0b3a29f61be6e1b478bb8a69a73377db7ba"}, - {file = "uvloop-0.20.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fee6044b64c965c425b65a4e17719953b96e065c5b7e09b599ff332bb2744bdf"}, - {file = "uvloop-0.20.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:265a99a2ff41a0fd56c19c3838b29bf54d1d177964c300dad388b27e84fd7847"}, - {file = "uvloop-0.20.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b10c2956efcecb981bf9cfb8184d27d5d64b9033f917115a960b83f11bfa0d6b"}, - {file = "uvloop-0.20.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e7d61fe8e8d9335fac1bf8d5d82820b4808dd7a43020c149b63a1ada953d48a6"}, - {file = "uvloop-0.20.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2beee18efd33fa6fdb0976e18475a4042cd31c7433c866e8a09ab604c7c22ff2"}, - {file = "uvloop-0.20.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d8c36fdf3e02cec92aed2d44f63565ad1522a499c654f07935c8f9d04db69e95"}, - {file = "uvloop-0.20.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0fac7be202596c7126146660725157d4813aa29a4cc990fe51346f75ff8fde7"}, - {file = "uvloop-0.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d0fba61846f294bce41eb44d60d58136090ea2b5b99efd21cbdf4e21927c56a"}, - {file = "uvloop-0.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95720bae002ac357202e0d866128eb1ac82545bcf0b549b9abe91b5178d9b541"}, - {file = "uvloop-0.20.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:36c530d8fa03bfa7085af54a48f2ca16ab74df3ec7108a46ba82fd8b411a2315"}, - {file = "uvloop-0.20.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e97152983442b499d7a71e44f29baa75b3b02e65d9c44ba53b10338e98dedb66"}, - {file = "uvloop-0.20.0.tar.gz", hash = "sha256:4603ca714a754fc8d9b197e325db25b2ea045385e8a3ad05d3463de725fdf469"}, -] - -[package.extras] -docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] - [[package]] name = "virtualenv" -version = "20.28.0" +version = "20.29.1" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" files = [ - {file = "virtualenv-20.28.0-py3-none-any.whl", hash = "sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0"}, - {file = "virtualenv-20.28.0.tar.gz", hash = "sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa"}, + {file = "virtualenv-20.29.1-py3-none-any.whl", hash = "sha256:4e4cb403c0b0da39e13b46b1b2476e505cb0046b25f242bee80f62bf990b2779"}, + {file = "virtualenv-20.29.1.tar.gz", hash = "sha256:b8b8970138d32fb606192cb97f6cd4bb644fa486be9308fb9b63f81091b5dc35"}, ] [package.dependencies] @@ -6235,76 +6249,90 @@ requests = ">=2.0.0,<3.0.0" [[package]] name = "wrapt" -version = "1.17.0" +version = "1.17.2" description = "Module for decorators, wrappers and monkey patching." optional = false python-versions = ">=3.8" files = [ - {file = "wrapt-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a0c23b8319848426f305f9cb0c98a6e32ee68a36264f45948ccf8e7d2b941f8"}, - {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1ca5f060e205f72bec57faae5bd817a1560fcfc4af03f414b08fa29106b7e2d"}, - {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e185ec6060e301a7e5f8461c86fb3640a7beb1a0f0208ffde7a65ec4074931df"}, - {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb90765dd91aed05b53cd7a87bd7f5c188fcd95960914bae0d32c5e7f899719d"}, - {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:879591c2b5ab0a7184258274c42a126b74a2c3d5a329df16d69f9cee07bba6ea"}, - {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fce6fee67c318fdfb7f285c29a82d84782ae2579c0e1b385b7f36c6e8074fffb"}, - {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0698d3a86f68abc894d537887b9bbf84d29bcfbc759e23f4644be27acf6da301"}, - {file = "wrapt-1.17.0-cp310-cp310-win32.whl", hash = "sha256:69d093792dc34a9c4c8a70e4973a3361c7a7578e9cd86961b2bbf38ca71e4e22"}, - {file = "wrapt-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:f28b29dc158ca5d6ac396c8e0a2ef45c4e97bb7e65522bfc04c989e6fe814575"}, - {file = "wrapt-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:74bf625b1b4caaa7bad51d9003f8b07a468a704e0644a700e936c357c17dd45a"}, - {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f2a28eb35cf99d5f5bd12f5dd44a0f41d206db226535b37b0c60e9da162c3ed"}, - {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:81b1289e99cf4bad07c23393ab447e5e96db0ab50974a280f7954b071d41b489"}, - {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2939cd4a2a52ca32bc0b359015718472d7f6de870760342e7ba295be9ebaf9"}, - {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6a9653131bda68a1f029c52157fd81e11f07d485df55410401f745007bd6d339"}, - {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4e4b4385363de9052dac1a67bfb535c376f3d19c238b5f36bddc95efae15e12d"}, - {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bdf62d25234290db1837875d4dceb2151e4ea7f9fff2ed41c0fde23ed542eb5b"}, - {file = "wrapt-1.17.0-cp311-cp311-win32.whl", hash = "sha256:5d8fd17635b262448ab8f99230fe4dac991af1dabdbb92f7a70a6afac8a7e346"}, - {file = "wrapt-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:92a3d214d5e53cb1db8b015f30d544bc9d3f7179a05feb8f16df713cecc2620a"}, - {file = "wrapt-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:89fc28495896097622c3fc238915c79365dd0ede02f9a82ce436b13bd0ab7569"}, - {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:875d240fdbdbe9e11f9831901fb8719da0bd4e6131f83aa9f69b96d18fae7504"}, - {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5ed16d95fd142e9c72b6c10b06514ad30e846a0d0917ab406186541fe68b451"}, - {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18b956061b8db634120b58f668592a772e87e2e78bc1f6a906cfcaa0cc7991c1"}, - {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:daba396199399ccabafbfc509037ac635a6bc18510ad1add8fd16d4739cdd106"}, - {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4d63f4d446e10ad19ed01188d6c1e1bb134cde8c18b0aa2acfd973d41fcc5ada"}, - {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8a5e7cc39a45fc430af1aefc4d77ee6bad72c5bcdb1322cfde852c15192b8bd4"}, - {file = "wrapt-1.17.0-cp312-cp312-win32.whl", hash = "sha256:0a0a1a1ec28b641f2a3a2c35cbe86c00051c04fffcfcc577ffcdd707df3f8635"}, - {file = "wrapt-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:3c34f6896a01b84bab196f7119770fd8466c8ae3dfa73c59c0bb281e7b588ce7"}, - {file = "wrapt-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:714c12485aa52efbc0fc0ade1e9ab3a70343db82627f90f2ecbc898fdf0bb181"}, - {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da427d311782324a376cacb47c1a4adc43f99fd9d996ffc1b3e8529c4074d393"}, - {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba1739fb38441a27a676f4de4123d3e858e494fac05868b7a281c0a383c098f4"}, - {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e711fc1acc7468463bc084d1b68561e40d1eaa135d8c509a65dd534403d83d7b"}, - {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:140ea00c87fafc42739bd74a94a5a9003f8e72c27c47cd4f61d8e05e6dec8721"}, - {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:73a96fd11d2b2e77d623a7f26e004cc31f131a365add1ce1ce9a19e55a1eef90"}, - {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0b48554952f0f387984da81ccfa73b62e52817a4386d070c75e4db7d43a28c4a"}, - {file = "wrapt-1.17.0-cp313-cp313-win32.whl", hash = "sha256:498fec8da10e3e62edd1e7368f4b24aa362ac0ad931e678332d1b209aec93045"}, - {file = "wrapt-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd136bb85f4568fffca995bd3c8d52080b1e5b225dbf1c2b17b66b4c5fa02838"}, - {file = "wrapt-1.17.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:17fcf043d0b4724858f25b8826c36e08f9fb2e475410bece0ec44a22d533da9b"}, - {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4a557d97f12813dc5e18dad9fa765ae44ddd56a672bb5de4825527c847d6379"}, - {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0229b247b0fc7dee0d36176cbb79dbaf2a9eb7ecc50ec3121f40ef443155fb1d"}, - {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8425cfce27b8b20c9b89d77fb50e368d8306a90bf2b6eef2cdf5cd5083adf83f"}, - {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9c900108df470060174108012de06d45f514aa4ec21a191e7ab42988ff42a86c"}, - {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:4e547b447073fc0dbfcbff15154c1be8823d10dab4ad401bdb1575e3fdedff1b"}, - {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:914f66f3b6fc7b915d46c1cc424bc2441841083de01b90f9e81109c9759e43ab"}, - {file = "wrapt-1.17.0-cp313-cp313t-win32.whl", hash = "sha256:a4192b45dff127c7d69b3bdfb4d3e47b64179a0b9900b6351859f3001397dabf"}, - {file = "wrapt-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4f643df3d4419ea3f856c5c3f40fec1d65ea2e89ec812c83f7767c8730f9827a"}, - {file = "wrapt-1.17.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:69c40d4655e078ede067a7095544bcec5a963566e17503e75a3a3e0fe2803b13"}, - {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f495b6754358979379f84534f8dd7a43ff8cff2558dcdea4a148a6e713a758f"}, - {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:baa7ef4e0886a6f482e00d1d5bcd37c201b383f1d314643dfb0367169f94f04c"}, - {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8fc931382e56627ec4acb01e09ce66e5c03c384ca52606111cee50d931a342d"}, - {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8f8909cdb9f1b237786c09a810e24ee5e15ef17019f7cecb207ce205b9b5fcce"}, - {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ad47b095f0bdc5585bced35bd088cbfe4177236c7df9984b3cc46b391cc60627"}, - {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:948a9bd0fb2c5120457b07e59c8d7210cbc8703243225dbd78f4dfc13c8d2d1f"}, - {file = "wrapt-1.17.0-cp38-cp38-win32.whl", hash = "sha256:5ae271862b2142f4bc687bdbfcc942e2473a89999a54231aa1c2c676e28f29ea"}, - {file = "wrapt-1.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:f335579a1b485c834849e9075191c9898e0731af45705c2ebf70e0cd5d58beed"}, - {file = "wrapt-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d751300b94e35b6016d4b1e7d0e7bbc3b5e1751e2405ef908316c2a9024008a1"}, - {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7264cbb4a18dc4acfd73b63e4bcfec9c9802614572025bdd44d0721983fc1d9c"}, - {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33539c6f5b96cf0b1105a0ff4cf5db9332e773bb521cc804a90e58dc49b10578"}, - {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c30970bdee1cad6a8da2044febd824ef6dc4cc0b19e39af3085c763fdec7de33"}, - {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:bc7f729a72b16ee21795a943f85c6244971724819819a41ddbaeb691b2dd85ad"}, - {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6ff02a91c4fc9b6a94e1c9c20f62ea06a7e375f42fe57587f004d1078ac86ca9"}, - {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2dfb7cff84e72e7bf975b06b4989477873dcf160b2fd89959c629535df53d4e0"}, - {file = "wrapt-1.17.0-cp39-cp39-win32.whl", hash = "sha256:2399408ac33ffd5b200480ee858baa58d77dd30e0dd0cab6a8a9547135f30a88"}, - {file = "wrapt-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:4f763a29ee6a20c529496a20a7bcb16a73de27f5da6a843249c7047daf135977"}, - {file = "wrapt-1.17.0-py3-none-any.whl", hash = "sha256:d2c63b93548eda58abf5188e505ffed0229bf675f7c3090f8e36ad55b8cbc371"}, - {file = "wrapt-1.17.0.tar.gz", hash = "sha256:16187aa2317c731170a88ef35e8937ae0f533c402872c1ee5e6d079fcf320801"}, + {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984"}, + {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22"}, + {file = "wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62"}, + {file = "wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563"}, + {file = "wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72"}, + {file = "wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317"}, + {file = "wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9"}, + {file = "wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9"}, + {file = "wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504"}, + {file = "wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a"}, + {file = "wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f"}, + {file = "wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555"}, + {file = "wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f"}, + {file = "wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7"}, + {file = "wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9"}, + {file = "wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb"}, + {file = "wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb"}, + {file = "wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8"}, + {file = "wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3"}, ] [[package]] @@ -6425,4 +6453,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "~3.10" -content-hash = "d4e96d771f8a3a2a585eb83d1c8af67a58b4635330baf3b4bf9ce565c217c642" +content-hash = "b835ad38f54ebac53009217c5d97d5e4d9a78a3f7b3327acaeefd7ffbdb2bb7f" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index add88b82..e69c1c13 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -1,7 +1,4 @@ [tool.poetry] -name = "graphrag-solution-accelerator" -version = "0.1.1" -description = "" authors = [ "Josh Bradley ", "Newman Cheng ", @@ -10,10 +7,12 @@ authors = [ "Douglas Orbaker ", "Chris Sanchez ", "Shane Solomon ", - "Kenny Zhang " + "Kenny Zhang ", ] +description = "A web API wrapper around the official GraphRAG library." license = "MIT" -package-mode = false +name = "graphrag-app" +version = "1.2.0" [tool.poetry.dependencies] python = "~3.10" @@ -40,27 +39,23 @@ wikipedia = ">=1.4.0" [tool.poetry.group.backend.dependencies] adlfs = ">=2024.7.0" -applicationinsights = ">=0.11.10" attrs = ">=23.2.0" azure-core = ">=1.30.1" azure-cosmos = ">=4.5.1" azure-identity = ">=1.15.0" +azure-monitor-opentelemetry = "^1.6.4" azure-search-documents = ">=11.4.0" azure-storage-blob = ">=12.19.0" -datashaper = ">=0.0.46" environs = ">=9.5.0" fastapi = ">=0.110.0" fastapi-offline = ">=1.7.3" fastparquet = ">=2023.10.1" fsspec = ">=2024.2.0" -graphrag = "==0.3.3" -graspologic = ">=3.3.0" +graphrag = "==1.2.0" httpx = ">=0.25.2" kubernetes = ">=29.0.0" networkx = ">=3.2.1" nltk = "*" -azure-monitor-opentelemetry-exporter = "*" -opentelemetry-sdk = ">=1.27.0" pandas = ">=2.2.1" pyaml-env = ">=1.2.1" pyarrow = ">=15.0.0" @@ -69,23 +64,23 @@ python-multipart = ">=0.0.6" requests = "*" rich = ">=13.7.1" tiktoken = ">=0.6.0" -uvicorn = ">=0.23.2" urllib3 = ">=2.2.2" +uvicorn = ">=0.23.2" [tool.ruff] -target-version = "py310" -line-length = 88 indent-width = 4 +line-length = 88 +target-version = "py310" [tool.ruff.format] preview = true quote-style = "double" [tool.ruff.lint] +ignore = ["E402", "E501", "F821"] preview = true select = ["E", "F", "I"] -ignore = ["E402", "E501", "F821"] [build-system] -requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] diff --git a/backend/run-indexing-job.py b/backend/run-indexing-job.py deleted file mode 100644 index 70f70146..00000000 --- a/backend/run-indexing-job.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import argparse -import asyncio - -from src import main # noqa: F401 -from src.api.index import _start_indexing_pipeline - -parser = argparse.ArgumentParser(description="Kickoff indexing job.") -parser.add_argument("-i", "--index-name", required=True) -args = parser.parse_args() - -asyncio.run( - _start_indexing_pipeline( - index_name=args.index_name, - ) -) diff --git a/backend/scripts/indexer.py b/backend/scripts/indexer.py new file mode 100644 index 00000000..ed8dcfb1 --- /dev/null +++ b/backend/scripts/indexer.py @@ -0,0 +1,180 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import argparse +import asyncio +import traceback +from pathlib import Path + +import graphrag.api as api +import yaml +from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks +from graphrag.config.create_graphrag_config import create_graphrag_config +from graphrag.index.create_pipeline_config import create_pipeline_config +from graphrag.index.typing import PipelineRunResult + +from graphrag_app.logger import ( + PipelineJobUpdater, + load_pipeline_logger, +) +from graphrag_app.typing.pipeline import PipelineJobState +from graphrag_app.utils.azure_clients import AzureClientManager +from graphrag_app.utils.common import get_cosmos_container_store_client, sanitize_name +from graphrag_app.utils.pipeline import PipelineJob + + +def start_indexing_job(index_name: str): + print("Start indexing job...") + # get sanitized name + sanitized_index_name = sanitize_name(index_name) + + # update or create new item in container-store in cosmosDB + azure_client_manager = AzureClientManager() + blob_service_client = azure_client_manager.get_blob_service_client() + if not blob_service_client.get_container_client(sanitized_index_name).exists(): + blob_service_client.create_container(sanitized_index_name) + + cosmos_container_client = get_cosmos_container_store_client() + cosmos_container_client.upsert_item({ + "id": sanitized_index_name, + "human_readable_name": index_name, + "type": "index", + }) + + print("Initialize pipeline job...") + pipelinejob = PipelineJob() + pipeline_job = pipelinejob.load_item(sanitized_index_name) + sanitized_storage_name = pipeline_job.sanitized_storage_name + storage_name = pipeline_job.human_readable_index_name + + # load custom pipeline settings + SCRIPT_DIR = Path(__file__).resolve().parent + with (SCRIPT_DIR / "settings.yaml").open("r") as f: + data = yaml.safe_load(f) + # dynamically set some values + data["input"]["container_name"] = sanitized_storage_name + data["storage"]["container_name"] = sanitized_index_name + data["reporting"]["container_name"] = sanitized_index_name + data["cache"]["container_name"] = sanitized_index_name + if "vector_store" in data["embeddings"]: + data["embeddings"]["vector_store"]["collection_name"] = ( + f"{sanitized_index_name}_description_embedding" + ) + + # set prompt for entity extraction + if pipeline_job.entity_extraction_prompt: + fname = "entity-extraction-prompt.txt" + with open(fname, "w") as outfile: + outfile.write(pipeline_job.entity_extraction_prompt) + data["entity_extraction"]["prompt"] = fname + else: + data.pop("entity_extraction") + + # set prompt for entity summarization + if pipeline_job.entity_summarization_prompt: + fname = "entity-summarization-prompt.txt" + with open(fname, "w") as outfile: + outfile.write(pipeline_job.entity_summarization_prompt) + data["summarize_descriptions"]["prompt"] = fname + else: + data.pop("summarize_descriptions") + + # set prompt for community summarization + if pipeline_job.community_summarization_prompt: + fname = "community-summarization-prompt.txt" + with open(fname, "w") as outfile: + outfile.write(pipeline_job.community_summarization_prompt) + data["community_reports"]["prompt"] = fname + else: + data.pop("community_reports") + + # generate default graphrag config parameters and override with custom settings + parameters = create_graphrag_config(data, ".") + + # reset pipeline job details + pipeline_job.status = PipelineJobState.RUNNING + pipeline_config = create_pipeline_config(parameters) + pipeline_job.all_workflows = [ + workflow.name for workflow in pipeline_config.workflows + ] + pipeline_job.completed_workflows = [] + pipeline_job.failed_workflows = [] + + # create new loggers/callbacks just for this job + print("Creating generic loggers...") + logger: WorkflowCallbacks = load_pipeline_logger( + logging_dir=sanitized_index_name, + index_name=index_name, + num_workflow_steps=len(pipeline_job.all_workflows), + ) + + # create pipeline job updater to monitor job progress + print("Creating pipeline job updater...") + pipeline_job_updater = PipelineJobUpdater(pipeline_job) + + # run the pipeline + try: + print("Building index...") + pipeline_results: list[PipelineRunResult] = asyncio.run( + api.build_index( + config=parameters, + callbacks=[logger, pipeline_job_updater], + ) + ) + + # once indexing job is done, check if any pipeline steps failed + for result in pipeline_results: + if result.errors: + pipeline_job.failed_workflows.append(result.workflow) + print("Indexing complete") + + if len(pipeline_job.failed_workflows) > 0: + print("Indexing pipeline encountered errors.") + pipeline_job.status = PipelineJobState.FAILED + logger.error( + message=f"Indexing pipeline encountered error for index'{index_name}'.", + details={ + "index": index_name, + "storage_name": storage_name, + "status_message": "indexing pipeline encountered error", + }, + ) + else: + print("Indexing pipeline complete.") + # record the pipeline completion + pipeline_job.status = PipelineJobState.COMPLETE + pipeline_job.percent_complete = 100 + logger.log( + message=f"Indexing pipeline complete for index'{index_name}'.", + details={ + "index": index_name, + "storage_name": storage_name, + "status_message": "indexing pipeline complete", + }, + ) + pipeline_job.progress = ( + f"{len(pipeline_job.completed_workflows)} out of " + f"{len(pipeline_job.all_workflows)} workflows completed successfully." + ) + if pipeline_job.status == PipelineJobState.FAILED: + exit(1) # signal to AKS that indexing job failed + except Exception as e: + pipeline_job.status = PipelineJobState.FAILED + error_details = { + "index": index_name, + "storage_name": storage_name, + } + logger.error( + message=f"Indexing pipeline failed for index '{index_name}'.", + cause=e, + stack=traceback.format_exc(), + details=error_details, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Build a graphrag index.") + parser.add_argument("-i", "--index-name", required=True) + args = parser.parse_args() + + start_indexing_job(index_name=args.index_name) diff --git a/backend/manage-indexing-jobs.py b/backend/scripts/job-scheduler.py similarity index 88% rename from backend/manage-indexing-jobs.py rename to backend/scripts/job-scheduler.py index 29167ffa..53b7ca3e 100644 --- a/backend/manage-indexing-jobs.py +++ b/backend/scripts/job-scheduler.py @@ -9,6 +9,8 @@ """ import os +import traceback +from pathlib import Path import pandas as pd import yaml @@ -17,11 +19,11 @@ config, ) -from src.api.azure_clients import AzureClientManager -from src.api.common import sanitize_name -from src.logger.logger_singleton import LoggerSingleton -from src.typing.pipeline import PipelineJobState -from src.utils.pipeline import PipelineJob +from graphrag_app.logger.load_logger import load_pipeline_logger +from graphrag_app.typing.pipeline import PipelineJobState +from graphrag_app.utils.azure_clients import AzureClientManager +from graphrag_app.utils.common import sanitize_name +from graphrag_app.utils.pipeline import PipelineJob def schedule_indexing_job(index_name: str): @@ -46,10 +48,12 @@ def schedule_indexing_job(index_name: str): batch_v1.create_namespaced_job( body=job_manifest, namespace=os.environ["AKS_NAMESPACE"] ) - except Exception: - reporter = LoggerSingleton().get_instance() - reporter.on_error( - "Index job manager encountered error scheduling indexing job", + except Exception as e: + reporter = load_pipeline_logger() + reporter.error( + message="Index job manager encountered error scheduling indexing job", + cause=e, + stack=traceback.format_exc(), ) # In the event of a catastrophic scheduling failure, something in k8s or the job manifest is likely broken. # Set job status to failed to prevent an infinite loop of re-scheduling @@ -67,15 +71,15 @@ def _generate_aks_job_manifest( The manifest must be valid YAML with certain values replaced by the provided arguments. """ - # NOTE: this file location is relative to the WORKDIR set in Dockerfile-backend - with open("indexing-job-template.yaml", "r") as f: + ROOT_DIR = Path(__file__).resolve().parent.parent + with (ROOT_DIR / "manifests/job.yaml").open("r") as f: manifest = yaml.safe_load(f) manifest["metadata"]["name"] = f"indexing-job-{sanitize_name(index_name)}" manifest["spec"]["template"]["spec"]["serviceAccountName"] = service_account_name manifest["spec"]["template"]["spec"]["containers"][0]["image"] = docker_image_name manifest["spec"]["template"]["spec"]["containers"][0]["command"] = [ "python", - "run-indexing-job.py", + "indexer.py", f"-i={index_name}", ] return manifest diff --git a/backend/scripts/settings.yaml b/backend/scripts/settings.yaml new file mode 100644 index 00000000..0ad482e6 --- /dev/null +++ b/backend/scripts/settings.yaml @@ -0,0 +1,137 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +# this yaml file serves as a configuration template for the graphrag indexing jobs +# some values are hardcoded while others denoted by PLACEHOLDER will be dynamically set + +###################### LLM settings ###################### +encoding_model: cl100k_base # this needs to be matched to your model! + +llm: + type: azure_openai_chat + api_base: $GRAPHRAG_API_BASE + api_version: $GRAPHRAG_API_VERSION + model: $GRAPHRAG_LLM_MODEL + deployment_name: $GRAPHRAG_LLM_DEPLOYMENT_NAME + cognitive_services_endpoint: $COGNITIVE_SERVICES_AUDIENCE + model_supports_json: True + tokens_per_minute: 80_000 + requests_per_minute: 480 + concurrent_requests: 25 + max_retries: 250 + max_retry_wait: 60.0 + sleep_on_rate_limit_recommendation: True + +parallelization: + num_threads: 10 + stagger: 0.25 + +async_mode: threaded # or asyncio + +embeddings: + vector_store: + type: azure_ai_search + collection_name: PLACEHOLDER + title_column: name + overwrite: True + url: $AI_SEARCH_URL + audience: $AI_SEARCH_AUDIENCE + llm: + type: azure_openai_embedding + api_base: $GRAPHRAG_API_BASE + api_version: $GRAPHRAG_API_VERSION + batch_size: 10 + model: $GRAPHRAG_EMBEDDING_MODEL + deployment_name: $GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME + cognitive_services_endpoint: $COGNITIVE_SERVICES_AUDIENCE + tokens_per_minute: 350_000 + requests_per_minute: 2_100 + +###################### Input settings ###################### +input: + type: blob + file_type: text + base_dir: . + file_encoding: utf-8 + file_pattern: .*\.txt$ + storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL + container_name: PLACEHOLDER + +chunks: + size: 1_200 + overlap: 100 + group_by_columns: [id] + +###################### Storage settings ###################### +cache: + type: blob + storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL + container_name: PLACEHOLDER + base_dir: cache + +reporting: + type: blob + storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL + container_name: PLACEHOLDER + base_dir: logs + +storage: + type: blob + storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL + container_name: PLACEHOLDER + base_dir: output + +###################### Workflow settings ###################### +skip_workflows: [] + +entity_extraction: + prompt: PLACEHOLDER + entity_types: [organization, person, geo, event] + max_gleanings: 1 + +summarize_descriptions: + prompt: PLACEHOLDER + max_length: 500 + +claim_extraction: + enabled: false + prompt: "prompts/claim_extraction.txt" + description: "Any claims or facts that could be relevant to information discovery." + max_gleanings: 1 + +community_reports: + prompt: PLACEHOLDER + max_length: 2_000 + max_input_length: 8_000 + +cluster_graph: + max_cluster_size: 10 + +embed_graph: + enabled: false + +umap: + enabled: false + +snapshots: + graphml: True + embeddings: false + transient: false + +###################### Query settings ###################### +## The prompt locations are required here, but each search method has a number of optional knobs that can be tuned. +## See the config docs: https://microsoft.github.io/graphrag/config/yaml/#query +local_search: + prompt: PLACEHOLDER + +global_search: + map_prompt: PLACEHOLDER + reduce_prompt: PLACEHOLDER + knowledge_prompt: PLACEHOLDER + +drift_search: + prompt: PLACEHOLDER + reduce_prompt: PLACEHOLDER + +basic_search: + prompt: PLACEHOLDER diff --git a/backend/src/api/common.py b/backend/src/api/common.py deleted file mode 100644 index bec50933..00000000 --- a/backend/src/api/common.py +++ /dev/null @@ -1,188 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import hashlib -import os -import re - -from azure.cosmos import exceptions -from azure.identity import DefaultAzureCredential -from fastapi import HTTPException - -from src.api.azure_clients import AzureClientManager - - -def get_pandas_storage_options() -> dict: - """Generate the storage options required by pandas to read parquet files from Storage.""" - # For more information on the options available, see: https://github.com/fsspec/adlfs?tab=readme-ov-file#setting-credentials - azure_client_manager = AzureClientManager() - options = { - "account_name": azure_client_manager.storage_account_name, - "account_host": azure_client_manager.storage_account_hostname, - } - if os.getenv("STORAGE_CONNECTION_STRING"): - options["connection_string"] = os.getenv("STORAGE_CONNECTION_STRING") - else: - options["credential"] = DefaultAzureCredential() - return options - - -def delete_blob_container(container_name: str): - """ - Delete a blob container. If it does not exist, do nothing. - If exception is raised, the calling function should catch it. - """ - azure_client_manager = AzureClientManager() - blob_service_client = azure_client_manager.get_blob_service_client() - if blob_service_client.get_container_client(container_name).exists(): - blob_service_client.delete_container(container_name) - - -def delete_cosmos_container_item(container: str, item_id: str): - """ - Delete an item from a cosmosdb container. If it does not exist, do nothing. - If exception is raised, the calling function should catch it. - """ - azure_client_manager = AzureClientManager() - try: - azure_client_manager.get_cosmos_container_client( - database="graphrag", container=container - ).delete_item(item_id, item_id) - except exceptions.CosmosResourceNotFoundError: - # If item does not exist, do nothing - pass - - -def validate_index_file_exist(index_name: str, file_name: str): - """ - Check if index exists and that the specified blob file exists. - - A "valid" index is defined by having an entry in the container-store table in cosmos db. - Further checks are done to ensure the blob container and file exist. - - Args: - ----- - index_name (str) - Name of the index to validate. - file_name (str) - The blob file to be validated. - - Raises: ValueError - """ - azure_client_manager = AzureClientManager() - try: - cosmos_container_client = azure_client_manager.get_cosmos_container_client( - database="graphrag", container="container-store" - ) - cosmos_container_client.read_item(index_name, index_name) - except Exception: - raise ValueError(f"Container {index_name} is not a valid index.") - # check for file existence - index_container_client = ( - azure_client_manager.get_blob_service_client().get_container_client(index_name) - ) - if not index_container_client.exists(): - raise ValueError(f"Container {index_name} not found.") - if not index_container_client.get_blob_client(file_name).exists(): - raise ValueError(f"File {file_name} in container {index_name} not found.") - - -def validate_blob_container_name(container_name: str): - """ - Check if container name is valid based on Azure resource naming rules. - - - A blob container name must be between 3 and 63 characters in length. - - Start with a letter or number - - All letters used in blob container names must be lowercase. - - Contain only letters, numbers, or the hyphen. - - Consecutive hyphens are not permitted. - - Cannot end with a hyphen. - - Args: - ----- - container_name (str) - The blob container name to be validated. - - Raises: ValueError - """ - # Check the length of the name - if len(container_name) < 3 or len(container_name) > 63: - raise ValueError( - f"Container name must be between 3 and 63 characters in length. Name provided was {len(container_name)} characters long." - ) - - # Check if the name starts with a letter or number - if not container_name[0].isalnum(): - raise ValueError( - f"Container name must start with a letter or number. Starting character was {container_name[0]}." - ) - - # Check for valid characters (letters, numbers, hyphen) and lowercase letters - if not re.match("^[a-z0-9-]+$", container_name): - raise ValueError( - f"Container name must only contain:\n- lowercase letters\n- numbers\n- or hyphens\nName provided was {container_name}." - ) - - # Check for consecutive hyphens - if "--" in container_name: - raise ValueError( - f"Container name cannot contain consecutive hyphens. Name provided was {container_name}." - ) - - # Check for hyphens at the end of the name - if container_name[-1] == "-": - raise ValueError( - f"Container name cannot end with a hyphen. Name provided was {container_name}." - ) - - -def sanitize_name(name: str | None) -> str | None: - """ - Sanitize a human-readable name by converting it to a SHA256 hash, then truncate - to 128 bit length to ensure it is within the 63 character limit imposed by Azure Storage. - - The sanitized name will be used to identify container names in both Azure Storage and CosmosDB. - - Args: - ----- - name (str) - The name to be sanitized. - - Returns: str - The sanitized name. - """ - if not name: - return None - name = name.encode() - name_hash = hashlib.sha256(name) - truncated_hash = name_hash.digest()[:16] # get the first 16 bytes (128 bits) - return truncated_hash.hex() - - -def retrieve_original_blob_container_name(sanitized_name: str) -> str | None: - """ - Retrieve the original human-readable container name of a sanitized blob name. - - Args: - ----- - sanitized_name (str) - The sanitized name to be converted back to the original name. - - Returns: str - The original human-readable name. - """ - azure_client_manager = AzureClientManager() - try: - container_store_client = azure_client_manager.get_cosmos_container_client( - database="graphrag", container="container-store" - ) - try: - return container_store_client.read_item(sanitized_name, sanitized_name)[ - "human_readable_name" - ] - except exceptions.CosmosResourceNotFoundError: - return None - except Exception: - raise HTTPException( - status_code=500, detail="Error retrieving original blob name." - ) diff --git a/backend/src/api/index.py b/backend/src/api/index.py deleted file mode 100644 index 3fda4151..00000000 --- a/backend/src/api/index.py +++ /dev/null @@ -1,476 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import asyncio -import inspect -import os -import traceback -from time import time -from typing import cast - -import yaml -from azure.identity import DefaultAzureCredential -from azure.search.documents.indexes import SearchIndexClient -from datashaper import WorkflowCallbacksManager -from fastapi import ( - APIRouter, - HTTPException, - UploadFile, -) -from graphrag.config import create_graphrag_config -from graphrag.index import create_pipeline_config -from graphrag.index.bootstrap import bootstrap -from graphrag.index.run import run_pipeline_with_config -from kubernetes import ( - client as kubernetes_client, -) -from kubernetes import ( - config as kubernetes_config, -) - -from src.api.azure_clients import AzureClientManager -from src.api.common import ( - delete_blob_container, - sanitize_name, - validate_blob_container_name, -) -from src.logger import ( - LoggerSingleton, - PipelineJobWorkflowCallbacks, - Reporters, - load_pipeline_logger, -) -from src.models import ( - BaseResponse, - IndexNameList, - IndexStatusResponse, -) -from src.typing.pipeline import PipelineJobState -from src.utils.pipeline import PipelineJob - -index_route = APIRouter( - prefix="/index", - tags=["Index Operations"], -) - - -@index_route.post( - "", - summary="Build an index", - response_model=BaseResponse, - responses={200: {"model": BaseResponse}}, -) -async def setup_indexing_pipeline( - storage_name: str, - index_name: str, - entity_extraction_prompt: UploadFile | None = None, - community_report_prompt: UploadFile | None = None, - summarize_descriptions_prompt: UploadFile | None = None, -): - azure_client_manager = AzureClientManager() - blob_service_client = azure_client_manager.get_blob_service_client() - pipelinejob = PipelineJob() - - # validate index name against blob container naming rules - sanitized_index_name = sanitize_name(index_name) - try: - validate_blob_container_name(sanitized_index_name) - except ValueError: - raise HTTPException( - status_code=500, - detail=f"Invalid index name: {index_name}", - ) - - # check for data container existence - sanitized_storage_name = sanitize_name(storage_name) - if not blob_service_client.get_container_client(sanitized_storage_name).exists(): - raise HTTPException( - status_code=500, - detail=f"Storage blob container {storage_name} does not exist", - ) - - # check for prompts - entity_extraction_prompt_content = ( - entity_extraction_prompt.file.read().decode("utf-8") - if entity_extraction_prompt - else None - ) - community_report_prompt_content = ( - community_report_prompt.file.read().decode("utf-8") - if community_report_prompt - else None - ) - summarize_descriptions_prompt_content = ( - summarize_descriptions_prompt.file.read().decode("utf-8") - if summarize_descriptions_prompt - else None - ) - - # check for existing index job - # it is okay if job doesn't exist, but if it does, - # it must not be scheduled or running - if pipelinejob.item_exist(sanitized_index_name): - existing_job = pipelinejob.load_item(sanitized_index_name) - if (PipelineJobState(existing_job.status) == PipelineJobState.SCHEDULED) or ( - PipelineJobState(existing_job.status) == PipelineJobState.RUNNING - ): - raise HTTPException( - status_code=202, # request has been accepted for processing but is not complete. - detail=f"Index '{index_name}' already exists and has not finished building.", - ) - # if indexing job is in a failed state, delete the associated K8s job and pod to allow for a new job to be scheduled - if PipelineJobState(existing_job.status) == PipelineJobState.FAILED: - _delete_k8s_job( - f"indexing-job-{sanitized_index_name}", os.environ["AKS_NAMESPACE"] - ) - # reset the pipeline job details - existing_job._status = PipelineJobState.SCHEDULED - existing_job._percent_complete = 0 - existing_job._progress = "" - existing_job._all_workflows = existing_job._completed_workflows = ( - existing_job._failed_workflows - ) = [] - existing_job._entity_extraction_prompt = entity_extraction_prompt_content - existing_job._community_report_prompt = community_report_prompt_content - existing_job._summarize_descriptions_prompt = ( - summarize_descriptions_prompt_content - ) - existing_job._epoch_request_time = int(time()) - existing_job.update_db() - else: - pipelinejob.create_item( - id=sanitized_index_name, - human_readable_index_name=index_name, - human_readable_storage_name=storage_name, - entity_extraction_prompt=entity_extraction_prompt_content, - community_report_prompt=community_report_prompt_content, - summarize_descriptions_prompt=summarize_descriptions_prompt_content, - status=PipelineJobState.SCHEDULED, - ) - - return BaseResponse(status="Indexing job scheduled") - - -async def _start_indexing_pipeline(index_name: str): - # get sanitized name - sanitized_index_name = sanitize_name(index_name) - - # update or create new item in container-store in cosmosDB - azure_client_manager = AzureClientManager() - blob_service_client = azure_client_manager.get_blob_service_client() - if not blob_service_client.get_container_client(sanitized_index_name).exists(): - blob_service_client.create_container(sanitized_index_name) - - cosmos_container_client = azure_client_manager.get_cosmos_container_client( - database="graphrag", container="container-store" - ) - cosmos_container_client.upsert_item({ - "id": sanitized_index_name, - "human_readable_name": index_name, - "type": "index", - }) - - logger = LoggerSingleton().get_instance() - pipelinejob = PipelineJob() - pipeline_job = pipelinejob.load_item(sanitized_index_name) - sanitized_storage_name = pipeline_job.sanitized_storage_name - storage_name = pipeline_job.human_readable_index_name - - # download nltk dependencies - bootstrap() - - # load custom pipeline settings - this_directory = os.path.dirname( - os.path.abspath(inspect.getfile(inspect.currentframe())) - ) - data = yaml.safe_load(open(f"{this_directory}/pipeline-settings.yaml")) - # dynamically set some values - data["input"]["container_name"] = sanitized_storage_name - data["storage"]["container_name"] = sanitized_index_name - data["reporting"]["container_name"] = sanitized_index_name - data["cache"]["container_name"] = sanitized_index_name - if "vector_store" in data["embeddings"]: - data["embeddings"]["vector_store"]["collection_name"] = ( - f"{sanitized_index_name}_description_embedding" - ) - - # set prompts for entity extraction, community report, and summarize descriptions. - if pipeline_job.entity_extraction_prompt: - fname = "entity-extraction-prompt.txt" - with open(fname, "w") as outfile: - outfile.write(pipeline_job.entity_extraction_prompt) - data["entity_extraction"]["prompt"] = fname - else: - data.pop("entity_extraction") - if pipeline_job.community_report_prompt: - fname = "community-report-prompt.txt" - with open(fname, "w") as outfile: - outfile.write(pipeline_job.community_report_prompt) - data["community_reports"]["prompt"] = fname - else: - data.pop("community_reports") - if pipeline_job.summarize_descriptions_prompt: - fname = "summarize-descriptions-prompt.txt" - with open(fname, "w") as outfile: - outfile.write(pipeline_job.summarize_descriptions_prompt) - data["summarize_descriptions"]["prompt"] = fname - else: - data.pop("summarize_descriptions") - - # generate the default pipeline and override with custom settings - parameters = create_graphrag_config(data, ".") - pipeline_config = create_pipeline_config(parameters, True) - - # reset pipeline job details - pipeline_job.status = PipelineJobState.RUNNING - pipeline_job.all_workflows = [] - pipeline_job.completed_workflows = [] - pipeline_job.failed_workflows = [] - for workflow in pipeline_config.workflows: - pipeline_job.all_workflows.append(workflow.name) - - # create new loggers/callbacks just for this job - loggers = [] - logger_names = os.getenv("REPORTERS", Reporters.CONSOLE.name.upper()).split(",") - for logger_name in logger_names: - try: - loggers.append(Reporters[logger_name.upper()]) - except KeyError: - raise ValueError(f"Unknown logger type: {logger_name}") - workflow_callbacks = load_pipeline_logger( - index_name=index_name, - num_workflow_steps=len(pipeline_job.all_workflows), - reporting_dir=sanitized_index_name, - reporters=loggers, - ) - - # add pipeline job callback to the callback manager - cast(WorkflowCallbacksManager, workflow_callbacks).register( - PipelineJobWorkflowCallbacks(pipeline_job) - ) - - # run the pipeline - try: - async for workflow_result in run_pipeline_with_config( - config_or_path=pipeline_config, - callbacks=workflow_callbacks, - progress_reporter=None, - ): - await asyncio.sleep(0) - if len(workflow_result.errors or []) > 0: - # if the workflow failed, record the failure - pipeline_job.failed_workflows.append(workflow_result.workflow) - pipeline_job.update_db() - # TODO: exit early if a workflow fails and add more detailed error logging - - # if job is done, check if any workflow steps failed - if len(pipeline_job.failed_workflows) > 0: - pipeline_job.status = PipelineJobState.FAILED - else: - # record the workflow completion - pipeline_job.status = PipelineJobState.COMPLETE - pipeline_job.percent_complete = 100 - - pipeline_job.progress = ( - f"{len(pipeline_job.completed_workflows)} out of " - f"{len(pipeline_job.all_workflows)} workflows completed successfully." - ) - - workflow_callbacks.on_log( - message=f"Indexing pipeline complete for index'{index_name}'.", - details={ - "index": index_name, - "storage_name": storage_name, - "status_message": "indexing pipeline complete", - }, - ) - - del workflow_callbacks # garbage collect - if pipeline_job.status == PipelineJobState.FAILED: - exit(1) # signal to AKS that indexing job failed - - except Exception as e: - pipeline_job.status = PipelineJobState.FAILED - - # update failed state in cosmos db - error_details = { - "index": index_name, - "storage_name": storage_name, - } - # log error in local index directory logs - workflow_callbacks.on_error( - message=f"Indexing pipeline failed for index '{index_name}'.", - cause=e, - stack=traceback.format_exc(), - details=error_details, - ) - # log error in global index directory logs - logger.on_error( - message=f"Indexing pipeline failed for index '{index_name}'.", - cause=e, - stack=traceback.format_exc(), - details=error_details, - ) - raise HTTPException( - status_code=500, - detail=f"Error encountered during indexing job for index '{index_name}'.", - ) - - -@index_route.get( - "", - summary="Get all indexes", - response_model=IndexNameList, - responses={200: {"model": IndexNameList}}, -) -async def get_all_indexes(): - """ - Retrieve a list of all index names. - """ - items = [] - try: - azure_client_manager = AzureClientManager() - container_store_client = azure_client_manager.get_cosmos_container_client( - database="graphrag", container="container-store" - ) - for item in container_store_client.read_all_items(): - if item["type"] == "index": - items.append(item["human_readable_name"]) - except Exception: - logger = LoggerSingleton().get_instance() - logger.on_error("Error retrieving index names") - return IndexNameList(index_name=items) - - -def _get_pod_name(job_name: str, namespace: str) -> str | None: - """Retrieve the name of a kubernetes pod associated with a given job name.""" - # function should work only when running in AKS - if not os.getenv("KUBERNETES_SERVICE_HOST"): - return None - kubernetes_config.load_incluster_config() - v1 = kubernetes_client.CoreV1Api() - ret = v1.list_namespaced_pod(namespace=namespace) - for i in ret.items: - if job_name in i.metadata.name: - return i.metadata.name - return None - - -def _delete_k8s_job(job_name: str, namespace: str) -> None: - """Delete a kubernetes job. - Must delete K8s job first and then any pods associated with it - """ - # function should only work when running in AKS - if not os.getenv("KUBERNETES_SERVICE_HOST"): - return None - logger = LoggerSingleton().get_instance() - kubernetes_config.load_incluster_config() - try: - batch_v1 = kubernetes_client.BatchV1Api() - batch_v1.delete_namespaced_job(name=job_name, namespace=namespace) - except Exception: - logger.on_error( - message=f"Error deleting k8s job {job_name}.", - details={"container": job_name}, - ) - pass - try: - core_v1 = kubernetes_client.CoreV1Api() - job_pod = _get_pod_name(job_name, os.environ["AKS_NAMESPACE"]) - if job_pod: - core_v1.delete_namespaced_pod(job_pod, namespace=namespace) - except Exception: - logger.on_error( - message=f"Error deleting k8s pod for job {job_name}.", - details={"container": job_name}, - ) - pass - - -@index_route.delete( - "/{index_name}", - summary="Delete a specified index", - response_model=BaseResponse, - responses={200: {"model": BaseResponse}}, -) -async def delete_index(index_name: str): - """ - Delete a specified index. - """ - sanitized_index_name = sanitize_name(index_name) - azure_client_manager = AzureClientManager() - try: - # kill indexing job if it is running - if os.getenv("KUBERNETES_SERVICE_HOST"): # only found if in AKS - _delete_k8s_job(f"indexing-job-{sanitized_index_name}", "graphrag") - - # remove blob container and all associated entries in cosmos db - try: - delete_blob_container(sanitized_index_name) - except Exception: - pass - - # update container-store in cosmosDB - try: - container_store_client = azure_client_manager.get_cosmos_container_client( - database="graphrag", container="container-store" - ) - container_store_client.delete_item( - item=sanitized_index_name, partition_key=sanitized_index_name - ) - except Exception: - pass - - # update jobs database in cosmosDB - try: - jobs_container = azure_client_manager.get_cosmos_container_client( - database="graphrag", container="jobs" - ) - jobs_container.delete_item( - item=sanitized_index_name, partition_key=sanitized_index_name - ) - except Exception: - pass - - index_client = SearchIndexClient( - endpoint=os.environ["AI_SEARCH_URL"], - credential=DefaultAzureCredential(), - audience=os.environ["AI_SEARCH_AUDIENCE"], - ) - ai_search_index_name = f"{sanitized_index_name}_description_embedding" - if ai_search_index_name in index_client.list_index_names(): - index_client.delete_index(ai_search_index_name) - - except Exception: - logger = LoggerSingleton().get_instance() - logger.on_error( - message=f"Error encountered while deleting all data for index {index_name}.", - stack=None, - details={"container": index_name}, - ) - raise HTTPException( - status_code=500, detail=f"Error deleting index '{index_name}'." - ) - - return BaseResponse(status="Success") - - -@index_route.get( - "/status/{index_name}", - summary="Track the status of an indexing job", - response_model=IndexStatusResponse, -) -async def get_index_job_status(index_name: str): - pipelinejob = PipelineJob() # TODO: fix class so initiliazation is not required - sanitized_index_name = sanitize_name(index_name) - if pipelinejob.item_exist(sanitized_index_name): - pipeline_job = pipelinejob.load_item(sanitized_index_name) - return IndexStatusResponse( - status_code=200, - index_name=pipeline_job.human_readable_index_name, - storage_name=pipeline_job.human_readable_storage_name, - status=pipeline_job.status.value, - percent_complete=pipeline_job.percent_complete, - progress=pipeline_job.progress, - ) - raise HTTPException(status_code=404, detail=f"Index '{index_name}' does not exist.") diff --git a/backend/src/api/index_configuration.py b/backend/src/api/index_configuration.py deleted file mode 100644 index 43efa34e..00000000 --- a/backend/src/api/index_configuration.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import inspect -import os -import shutil -import traceback - -import yaml -from fastapi import ( - APIRouter, - HTTPException, -) -from fastapi.responses import StreamingResponse -from graphrag.prompt_tune.cli import prompt_tune as generate_fine_tune_prompts - -from src.api.azure_clients import AzureClientManager -from src.api.common import ( - sanitize_name, -) -from src.logger import LoggerSingleton - -index_configuration_route = APIRouter( - prefix="/index/config", tags=["Index Configuration"] -) - - -@index_configuration_route.get( - "/prompts", - summary="Generate graphrag prompts from user-provided data.", - description="Generating custom prompts from user-provided data may take several minutes to run based on the amount of data used.", -) -async def generate_prompts(storage_name: str, limit: int = 5): - """ - Automatically generate custom prompts for entity entraction, - community reports, and summarize descriptions based on a sample of provided data. - """ - # check for storage container existence - azure_client_manager = AzureClientManager() - blob_service_client = azure_client_manager.get_blob_service_client() - sanitized_storage_name = sanitize_name(storage_name) - if not blob_service_client.get_container_client(sanitized_storage_name).exists(): - raise HTTPException( - status_code=500, - detail=f"Data container '{storage_name}' does not exist.", - ) - this_directory = os.path.dirname( - os.path.abspath(inspect.getfile(inspect.currentframe())) - ) - - # write custom settings.yaml to a file and store in a temporary directory - data = yaml.safe_load(open(f"{this_directory}/pipeline-settings.yaml")) - data["input"]["container_name"] = sanitized_storage_name - temp_dir = f"/tmp/{sanitized_storage_name}_prompt_tuning" - shutil.rmtree(temp_dir, ignore_errors=True) - os.makedirs(temp_dir, exist_ok=True) - with open(f"{temp_dir}/settings.yaml", "w") as f: - yaml.dump(data, f, default_flow_style=False) - - # generate prompts - try: - await generate_fine_tune_prompts( - config=f"{temp_dir}/settings.yaml", - root=temp_dir, - domain="", - selection_method="random", - limit=limit, - skip_entity_types=True, - output=f"{temp_dir}/prompts", - ) - except Exception as e: - logger = LoggerSingleton().get_instance() - error_details = { - "storage_name": storage_name, - } - logger.on_error( - message="Auto-prompt generation failed.", - cause=e, - stack=traceback.format_exc(), - details=error_details, - ) - raise HTTPException( - status_code=500, - detail=f"Error generating prompts for data in '{storage_name}'. Please try a lower limit.", - ) - - # zip up the generated prompt files and return the zip file - temp_archive = ( - f"{temp_dir}/prompts" # will become a zip file with the name prompts.zip - ) - shutil.make_archive(temp_archive, "zip", root_dir=temp_dir, base_dir="prompts") - - def iterfile(file_path: str): - with open(file_path, mode="rb") as file_like: - yield from file_like - - return StreamingResponse(iterfile(f"{temp_archive}.zip")) diff --git a/backend/src/api/pipeline-settings.yaml b/backend/src/api/pipeline-settings.yaml deleted file mode 100644 index 92b70fae..00000000 --- a/backend/src/api/pipeline-settings.yaml +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -# this yaml file serves as a configuration template for the graphrag indexing jobs -# some values are hardcoded while others denoted by PLACEHOLDER will be dynamically set -input: - type: blob - file_type: text - file_pattern: .*\.txt$ - storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL - container_name: PLACEHOLDER - base_dir: . - -storage: - type: blob - storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL - container_name: PLACEHOLDER - base_dir: output - -reporting: - type: blob - storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL - container_name: PLACEHOLDER - base_dir: logs - -cache: - type: blob - storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL - container_name: PLACEHOLDER - base_dir: cache - -llm: - type: azure_openai_chat - api_base: $GRAPHRAG_API_BASE - api_version: $GRAPHRAG_API_VERSION - model: $GRAPHRAG_LLM_MODEL - deployment_name: $GRAPHRAG_LLM_DEPLOYMENT_NAME - cognitive_services_endpoint: $GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT - model_supports_json: True - tokens_per_minute: 80000 - requests_per_minute: 480 - thread_count: 50 - concurrent_requests: 25 - -parallelization: - stagger: 0.25 - num_threads: 10 - -async_mode: threaded - -embeddings: - async_mode: threaded - llm: - type: azure_openai_embedding - api_base: $GRAPHRAG_API_BASE - api_version: $GRAPHRAG_API_VERSION - batch_size: 16 - model: $GRAPHRAG_EMBEDDING_MODEL - deployment_name: $GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME - cognitive_services_endpoint: $GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT - tokens_per_minute: 350000 - concurrent_requests: 25 - requests_per_minute: 2100 - thread_count: 50 - max_retries: 50 - parallelization: - stagger: 0.25 - num_threads: 10 - vector_store: - type: azure_ai_search - collection_name: PLACEHOLDER - title_column: name - overwrite: True - url: $AI_SEARCH_URL - audience: $AI_SEARCH_AUDIENCE - -entity_extraction: - prompt: PLACEHOLDER - -community_reports: - prompt: PLACEHOLDER - -summarize_descriptions: - prompt: PLACEHOLDER - -# claim extraction is disabled by default in the graphrag library so we enable it for the solution accelerator -claim_extraction: - enabled: True - -snapshots: - graphml: True diff --git a/backend/src/api/query.py b/backend/src/api/query.py deleted file mode 100644 index 134a3203..00000000 --- a/backend/src/api/query.py +++ /dev/null @@ -1,651 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import inspect -import json -import os -import traceback -from typing import Any - -import pandas as pd -import yaml -from azure.identity import DefaultAzureCredential -from azure.search.documents import SearchClient -from azure.search.documents.models import VectorizedQuery -from fastapi import ( - APIRouter, - HTTPException, -) -from graphrag.config import create_graphrag_config -from graphrag.model.types import TextEmbedder -from graphrag.query.api import global_search, local_search -from graphrag.vector_stores.base import ( - BaseVectorStore, - VectorStoreDocument, - VectorStoreSearchResult, -) - -from src.api.azure_clients import AzureClientManager -from src.api.common import ( - sanitize_name, - validate_index_file_exist, -) -from src.logger import LoggerSingleton -from src.models import ( - GraphRequest, - GraphResponse, -) -from src.typing.pipeline import PipelineJobState -from src.utils import query as query_helper -from src.utils.pipeline import PipelineJob - -query_route = APIRouter( - prefix="/query", - tags=["Query Operations"], -) - - -@query_route.post( - "/global", - summary="Perform a global search across the knowledge graph index", - description="The global query method generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole.", - response_model=GraphResponse, - responses={200: {"model": GraphResponse}}, -) -async def global_query(request: GraphRequest): - # this is a slightly modified version of the graphrag.query.cli.run_global_search method - if isinstance(request.index_name, str): - index_names = [request.index_name] - else: - index_names = request.index_name - sanitized_index_names = [sanitize_name(name) for name in index_names] - sanitized_index_names_link = { - s: i for s, i in zip(sanitized_index_names, index_names) - } - - for index_name in sanitized_index_names: - if not _is_index_complete(index_name): - raise HTTPException( - status_code=500, - detail=f"{index_name} not ready for querying.", - ) - - COMMUNITY_REPORT_TABLE = "output/create_final_community_reports.parquet" - ENTITIES_TABLE = "output/create_final_entities.parquet" - NODES_TABLE = "output/create_final_nodes.parquet" - - for index_name in sanitized_index_names: - validate_index_file_exist(index_name, COMMUNITY_REPORT_TABLE) - validate_index_file_exist(index_name, ENTITIES_TABLE) - validate_index_file_exist(index_name, NODES_TABLE) - - if isinstance(request.community_level, int): - COMMUNITY_LEVEL = request.community_level - else: - # Current investigations show that community level 1 is the most useful for global search. Set this as the default value - COMMUNITY_LEVEL = 1 - - try: - links = { - "nodes": {}, - "community": {}, - "entities": {}, - "text_units": {}, - "relationships": {}, - "covariates": {}, - } - max_vals = { - "nodes": -1, - "community": -1, - "entities": -1, - "text_units": -1, - "relationships": -1, - "covariates": -1, - } - - community_dfs = [] - entities_dfs = [] - nodes_dfs = [] - - for index_name in sanitized_index_names: - community_report_table_path = ( - f"abfs://{index_name}/{COMMUNITY_REPORT_TABLE}" - ) - entities_table_path = f"abfs://{index_name}/{ENTITIES_TABLE}" - nodes_table_path = f"abfs://{index_name}/{NODES_TABLE}" - - # read the parquet files into DataFrames and add provenance information - # note that nodes need to be set before communities so that max community id makes sense - nodes_df = query_helper.get_df(nodes_table_path) - for i in nodes_df["human_readable_id"]: - links["nodes"][i + max_vals["nodes"] + 1] = { - "index_name": sanitized_index_names_link[index_name], - "id": i, - } - if max_vals["nodes"] != -1: - nodes_df["human_readable_id"] += max_vals["nodes"] + 1 - nodes_df["community"] = nodes_df["community"].apply( - lambda x: str(int(x) + max_vals["community"] + 1) if x else x - ) - nodes_df["title"] = nodes_df["title"].apply(lambda x: x + f"-{index_name}") - nodes_df["source_id"] = nodes_df["source_id"].apply( - lambda x: ",".join([i + f"-{index_name}" for i in x.split(",")]) - ) - max_vals["nodes"] = nodes_df["human_readable_id"].max() - nodes_dfs.append(nodes_df) - - community_df = query_helper.get_df(community_report_table_path) - for i in community_df["community"].astype(int): - links["community"][i + max_vals["community"] + 1] = { - "index_name": sanitized_index_names_link[index_name], - "id": str(i), - } - if max_vals["community"] != -1: - col = community_df["community"].astype(int) + max_vals["community"] + 1 - community_df["community"] = col.astype(str) - max_vals["community"] = community_df["community"].astype(int).max() - community_dfs.append(community_df) - - entities_df = query_helper.get_df(entities_table_path) - for i in entities_df["human_readable_id"]: - links["entities"][i + max_vals["entities"] + 1] = { - "index_name": sanitized_index_names_link[index_name], - "id": i, - } - if max_vals["entities"] != -1: - entities_df["human_readable_id"] += max_vals["entities"] + 1 - entities_df["name"] = entities_df["name"].apply( - lambda x: x + f"-{index_name}" - ) - entities_df["text_unit_ids"] = entities_df["text_unit_ids"].apply( - lambda x: [i + f"-{index_name}" for i in x] - ) - max_vals["entities"] = entities_df["human_readable_id"].max() - entities_dfs.append(entities_df) - - # merge the dataframes - nodes_combined = pd.concat(nodes_dfs, axis=0, ignore_index=True, sort=False) - community_combined = pd.concat( - community_dfs, axis=0, ignore_index=True, sort=False - ) - entities_combined = pd.concat( - entities_dfs, axis=0, ignore_index=True, sort=False - ) - - # load custom pipeline settings - this_directory = os.path.dirname( - os.path.abspath(inspect.getfile(inspect.currentframe())) - ) - data = yaml.safe_load(open(f"{this_directory}/pipeline-settings.yaml")) - # layer the custom settings on top of the default configuration settings of graphrag - parameters = create_graphrag_config(data, ".") - - # perform async search - result = await global_search( - config=parameters, - nodes=nodes_combined, - entities=entities_combined, - community_reports=community_combined, - community_level=COMMUNITY_LEVEL, - response_type="Multiple Paragraphs", - query=request.query, - ) - - # link index provenance to the context data - context_data = _update_context(result[1], links) - - return GraphResponse(result=result[0], context_data=context_data) - except Exception as e: - logger = LoggerSingleton().get_instance() - logger.on_error( - message="Could not perform global search.", - cause=e, - stack=traceback.format_exc(), - ) - raise HTTPException(status_code=500, detail=None) - - -@query_route.post( - "/local", - summary="Perform a local search across the knowledge graph index.", - description="The local query method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).", - response_model=GraphResponse, - responses={200: {"model": GraphResponse}}, -) -async def local_query(request: GraphRequest): - if isinstance(request.index_name, str): - index_names = [request.index_name] - else: - index_names = request.index_name - sanitized_index_names = [sanitize_name(name) for name in index_names] - sanitized_index_names_link = { - s: i for s, i in zip(sanitized_index_names, index_names) - } - - for index_name in sanitized_index_names: - if not _is_index_complete(index_name): - raise HTTPException( - status_code=500, - detail=f"{index_name} not ready for querying.", - ) - - azure_client_manager = AzureClientManager() - blob_service_client = azure_client_manager.get_blob_service_client() - - community_dfs = [] - covariates_dfs = [] - entities_dfs = [] - nodes_dfs = [] - relationships_dfs = [] - text_units_dfs = [] - - links = { - "nodes": {}, - "community": {}, - "entities": {}, - "text_units": {}, - "relationships": {}, - "covariates": {}, - } - max_vals = { - "nodes": -1, - "community": -1, - "entities": -1, - "text_units": -1, - "relationships": -1, - "covariates": -1, - } - - COMMUNITY_REPORT_TABLE = "output/create_final_community_reports.parquet" - COVARIATES_TABLE = "output/create_final_covariates.parquet" - ENTITIES_TABLE = "output/create_final_entities.parquet" - NODES_TABLE = "output/create_final_nodes.parquet" - RELATIONSHIPS_TABLE = "output/create_final_relationships.parquet" - TEXT_UNITS_TABLE = "output/create_final_text_units.parquet" - - if isinstance(request.community_level, int): - COMMUNITY_LEVEL = request.community_level - else: - # Current investigations show that community level 2 is the most useful for local search. Set this as the default value - COMMUNITY_LEVEL = 2 - - for index_name in sanitized_index_names: - # check for existence of files the query relies on to validate the index is complete - validate_index_file_exist(index_name, COMMUNITY_REPORT_TABLE) - validate_index_file_exist(index_name, ENTITIES_TABLE) - validate_index_file_exist(index_name, NODES_TABLE) - validate_index_file_exist(index_name, RELATIONSHIPS_TABLE) - validate_index_file_exist(index_name, TEXT_UNITS_TABLE) - - community_report_table_path = f"abfs://{index_name}/{COMMUNITY_REPORT_TABLE}" - covariates_table_path = f"abfs://{index_name}/{COVARIATES_TABLE}" - entities_table_path = f"abfs://{index_name}/{ENTITIES_TABLE}" - nodes_table_path = f"abfs://{index_name}/{NODES_TABLE}" - relationships_table_path = f"abfs://{index_name}/{RELATIONSHIPS_TABLE}" - text_units_table_path = f"abfs://{index_name}/{TEXT_UNITS_TABLE}" - - # read the parquet files into DataFrames and add provenance information - - # note that nodes need to set before communities to that max community id makes sense - nodes_df = query_helper.get_df(nodes_table_path) - for i in nodes_df["human_readable_id"]: - links["nodes"][i + max_vals["nodes"] + 1] = { - "index_name": sanitized_index_names_link[index_name], - "id": i, - } - if max_vals["nodes"] != -1: - nodes_df["human_readable_id"] += max_vals["nodes"] + 1 - nodes_df["community"] = nodes_df["community"].apply( - lambda x: str(int(x) + max_vals["community"] + 1) if x else x - ) - nodes_df["id"] = nodes_df["id"].apply(lambda x: x + f"-{index_name}") - nodes_df["title"] = nodes_df["title"].apply(lambda x: x + f"-{index_name}") - nodes_df["source_id"] = nodes_df["source_id"].apply( - lambda x: ",".join([i + f"-{index_name}" for i in x.split(",")]) - ) - max_vals["nodes"] = nodes_df["human_readable_id"].max() - nodes_dfs.append(nodes_df) - - community_df = query_helper.get_df(community_report_table_path) - for i in community_df["community"].astype(int): - links["community"][i + max_vals["community"] + 1] = { - "index_name": sanitized_index_names_link[index_name], - "id": str(i), - } - if max_vals["community"] != -1: - col = community_df["community"].astype(int) + max_vals["community"] + 1 - community_df["community"] = col.astype(str) - max_vals["community"] = community_df["community"].astype(int).max() - community_dfs.append(community_df) - - entities_df = query_helper.get_df(entities_table_path) - for i in entities_df["human_readable_id"]: - links["entities"][i + max_vals["entities"] + 1] = { - "index_name": sanitized_index_names_link[index_name], - "id": i, - } - if max_vals["entities"] != -1: - entities_df["human_readable_id"] += max_vals["entities"] + 1 - entities_df["id"] = entities_df["id"].apply(lambda x: x + f"-{index_name}") - entities_df["name"] = entities_df["name"].apply(lambda x: x + f"-{index_name}") - entities_df["text_unit_ids"] = entities_df["text_unit_ids"].apply( - lambda x: [i + f"-{index_name}" for i in x] - ) - max_vals["entities"] = entities_df["human_readable_id"].max() - entities_dfs.append(entities_df) - - relationships_df = query_helper.get_df(relationships_table_path) - for i in relationships_df["human_readable_id"].astype(int): - links["relationships"][i + max_vals["relationships"] + 1] = { - "index_name": sanitized_index_names_link[index_name], - "id": i, - } - if max_vals["relationships"] != -1: - col = ( - relationships_df["human_readable_id"].astype(int) - + max_vals["relationships"] - + 1 - ) - relationships_df["human_readable_id"] = col.astype(str) - relationships_df["source"] = relationships_df["source"].apply( - lambda x: x + f"-{index_name}" - ) - relationships_df["target"] = relationships_df["target"].apply( - lambda x: x + f"-{index_name}" - ) - relationships_df["text_unit_ids"] = relationships_df["text_unit_ids"].apply( - lambda x: [i + f"-{index_name}" for i in x] - ) - max_vals["relationships"] = ( - relationships_df["human_readable_id"].astype(int).max() - ) - relationships_dfs.append(relationships_df) - - text_units_df = query_helper.get_df(text_units_table_path) - text_units_df["id"] = text_units_df["id"].apply(lambda x: f"{x}-{index_name}") - text_units_dfs.append(text_units_df) - - index_container_client = blob_service_client.get_container_client(index_name) - if index_container_client.get_blob_client(COVARIATES_TABLE).exists(): - covariates_df = query_helper.get_df(covariates_table_path) - if i in covariates_df["human_readable_id"].astype(int): - links["covariates"][i + max_vals["covariates"] + 1] = { - "index_name": sanitized_index_names_link[index_name], - "id": i, - } - if max_vals["covariates"] != -1: - col = ( - covariates_df["human_readable_id"].astype(int) - + max_vals["covariates"] - + 1 - ) - covariates_df["human_readable_id"] = col.astype(str) - max_vals["covariates"] = ( - covariates_df["human_readable_id"].astype(int).max() - ) - covariates_dfs.append(covariates_df) - - nodes_combined = pd.concat(nodes_dfs, axis=0, ignore_index=True) - community_combined = pd.concat(community_dfs, axis=0, ignore_index=True) - entities_combined = pd.concat(entities_dfs, axis=0, ignore_index=True) - text_units_combined = pd.concat(text_units_dfs, axis=0, ignore_index=True) - relationships_combined = pd.concat(relationships_dfs, axis=0, ignore_index=True) - covariates_combined = ( - pd.concat(covariates_dfs, axis=0, ignore_index=True) - if covariates_dfs != [] - else None - ) - - # load custom pipeline settings - this_directory = os.path.dirname( - os.path.abspath(inspect.getfile(inspect.currentframe())) - ) - data = yaml.safe_load(open(f"{this_directory}/pipeline-settings.yaml")) - # layer the custom settings on top of the default configuration settings of graphrag - parameters = create_graphrag_config(data, ".") - - # add index_names to vector_store args - parameters.embeddings.vector_store["index_names"] = sanitized_index_names - # internally write over the get_embedding_description_store - # method to use the multi-index collection. - import graphrag.query.api - - graphrag.query.api._get_embedding_description_store = ( - _get_embedding_description_store - ) - # perform async search - result = await local_search( - config=parameters, - nodes=nodes_combined, - entities=entities_combined, - community_reports=community_combined, - text_units=text_units_combined, - relationships=relationships_combined, - covariates=covariates_combined, - community_level=COMMUNITY_LEVEL, - response_type="Multiple Paragraphs", - query=request.query, - ) - - # link index provenance to the context data - context_data = _update_context(result[1], links) - - return GraphResponse(result=result[0], context_data=context_data) - - -def _is_index_complete(index_name: str) -> bool: - """ - Check if an index is ready for querying. - - An index is ready for use only if it exists in the jobs table in cosmos db and - the indexing build job has finished (i.e. 100 percent). Otherwise it is not ready. - - Args: - ----- - index_name (str) - Name of the index to check. - - Returns: bool - True if the index is ready for use, False otherwise. - """ - if PipelineJob.item_exist(index_name): - pipeline_job = PipelineJob.load_item(index_name) - if PipelineJobState(pipeline_job.status) == PipelineJobState.COMPLETE: - return True - return False - - -def _update_context(context, links): - """ - Update context data. - context_keys = ['reports', 'entities', 'relationships', 'claims', 'sources'] - """ - updated_context = {} - for key in context: - updated_entry = [] - if key == "reports": - updated_entry = [ - dict( - {k: entry[k] for k in entry}, - **{ - "index_name": links["community"][int(entry["id"])][ - "index_name" - ], - "index_id": links["community"][int(entry["id"])]["id"], - }, - ) - for entry in context[key] - ] - if key == "entities": - updated_entry = [ - dict( - {k: entry[k] for k in entry}, - **{ - "entity": entry["entity"].split("-")[0], - "index_name": links["entities"][int(entry["id"])]["index_name"], - "index_id": links["entities"][int(entry["id"])]["id"], - }, - ) - for entry in context[key] - ] - if key == "relationships": - updated_entry = [ - dict( - {k: entry[k] for k in entry}, - **{ - "source": entry["source"].split("-")[0], - "target": entry["target"].split("-")[0], - "index_name": links["relationships"][int(entry["id"])][ - "index_name" - ], - "index_id": links["relationships"][int(entry["id"])]["id"], - }, - ) - for entry in context[key] - ] - if key == "claims": - updated_entry = [ - dict( - {k: entry[k] for k in entry}, - **{ - "index_name": links["claims"][int(entry["id"])]["index_name"], - "index_id": links["claims"][int(entry["id"])]["id"], - }, - ) - for entry in context[key] - ] - if key == "sources": - updated_entry = context[key] - updated_context[key] = updated_entry - return updated_context - - -def _get_embedding_description_store( - entities: Any, - vector_store_type: str = Any, - config_args: dict | None = None, -): - collection_names = [ - f"{index_name}_description_embedding" - for index_name in config_args.get("index_names", []) - ] - ai_search_url = os.environ["AI_SEARCH_URL"] - description_embedding_store = MultiAzureAISearch( - collection_name="multi", - document_collection=None, - db_connection=None, - ) - description_embedding_store.connect(url=ai_search_url) - for collection_name in collection_names: - description_embedding_store.add_collection(collection_name) - return description_embedding_store - - -class MultiAzureAISearch(BaseVectorStore): - """The Azure AI Search vector storage implementation.""" - - def __init__( - self, - collection_name: str, - db_connection: Any, - document_collection: Any, - query_filter: Any | None = None, - **kwargs: Any, - ): - self.collection_name = collection_name - self.db_connection = db_connection - self.document_collection = document_collection - self.query_filter = query_filter - self.kwargs = kwargs - self.collections = [] - - def add_collection(self, collection_name: str): - self.collections.append(collection_name) - - def connect(self, **kwargs: Any) -> Any: - """Connect to the AzureAI vector store.""" - self.url = kwargs.get("url", None) - self.vector_size = kwargs.get("vector_size", 1536) - - self.vector_search_profile_name = kwargs.get( - "vector_search_profile_name", "vectorSearchProfile" - ) - - if self.url: - pass - else: - not_supported_error = ( - "Azure AI Search client is not supported on local host." - ) - raise ValueError(not_supported_error) - - def load_documents( - self, documents: list[VectorStoreDocument], overwrite: bool = True - ) -> None: - raise NotImplementedError("load_documents() method not implemented") - - def filter_by_id(self, include_ids: list[str] | list[int]) -> Any: - """Build a query filter to filter documents by a list of ids.""" - if include_ids is None or len(include_ids) == 0: - self.query_filter = None - # returning to keep consistency with other methods, but not needed - return self.query_filter - - # more info about odata filtering here: https://learn.microsoft.com/en-us/azure/search/search-query-odata-search-in-function - # search.in is faster that joined and/or conditions - id_filter = ",".join([f"{id!s}" for id in include_ids]) - self.query_filter = f"search.in(id, '{id_filter}', ',')" - - # returning to keep consistency with other methods, but not needed - # TODO: Refactor on a future PR - return self.query_filter - - def similarity_search_by_vector( - self, query_embedding: list[float], k: int = 10, **kwargs: Any - ) -> list[VectorStoreSearchResult]: - """Perform a vector-based similarity search.""" - vectorized_query = VectorizedQuery( - vector=query_embedding, k_nearest_neighbors=k, fields="vector" - ) - - docs = [] - for collection_name in self.collections: - add_on = "-" + str(collection_name.split("_")[0]) - audience = os.environ["AI_SEARCH_AUDIENCE"] - db_connection = SearchClient( - self.url, - collection_name, - DefaultAzureCredential(), - audience=audience, - ) - response = db_connection.search( - vector_queries=[vectorized_query], - ) - mod_response = [] - for r in response: - r["id"] = r.get("id", "") + add_on - mod_response += [r] - docs += mod_response - return [ - VectorStoreSearchResult( - document=VectorStoreDocument( - id=doc.get("id", ""), - text=doc.get("text", ""), - vector=doc.get("vector", []), - attributes=(json.loads(doc.get("attributes", "{}"))), - ), - score=abs(doc["@search.score"]), - ) - for doc in docs - ] - - def similarity_search_by_text( - self, text: str, text_embedder: TextEmbedder, k: int = 10, **kwargs: Any - ) -> list[VectorStoreSearchResult]: - """Perform a text-based similarity search.""" - query_embedding = text_embedder(text) - if query_embedding: - return self.similarity_search_by_vector( - query_embedding=query_embedding, k=k - ) - return [] diff --git a/backend/src/logger/__init__.py b/backend/src/logger/__init__.py deleted file mode 100644 index 1eea5f43..00000000 --- a/backend/src/logger/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from src.logger.application_insights_workflow_callbacks import ( - ApplicationInsightsWorkflowCallbacks, -) -from src.logger.console_workflow_callbacks import ConsoleWorkflowCallbacks -from src.logger.load_logger import load_pipeline_logger -from src.logger.logger_singleton import LoggerSingleton -from src.logger.pipeline_job_workflow_callbacks import PipelineJobWorkflowCallbacks -from src.logger.typing import ( - PipelineAppInsightsReportingConfig, - PipelineReportingConfigTypes, - Reporters, -) - -__all__ = [ - "Reporters", - "ApplicationInsightsWorkflowCallbacks", - "ConsoleWorkflowCallbacks", - "LoggerSingleton", - "PipelineAppInsightsReportingConfig", - "PipelineJobWorkflowCallbacks", - "PipelineReportingConfigTypes", - "load_pipeline_logger", -] diff --git a/backend/src/logger/load_logger.py b/backend/src/logger/load_logger.py deleted file mode 100644 index cd0383db..00000000 --- a/backend/src/logger/load_logger.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from pathlib import Path -from typing import List - -from datashaper import WorkflowCallbacks, WorkflowCallbacksManager -from graphrag.index.reporting import FileWorkflowCallbacks - -from src.api.azure_clients import AzureClientManager -from src.logger.application_insights_workflow_callbacks import ( - ApplicationInsightsWorkflowCallbacks, -) -from src.logger.blob_workflow_callbacks import BlobWorkflowCallbacks -from src.logger.console_workflow_callbacks import ConsoleWorkflowCallbacks -from src.logger.typing import Reporters - - -def load_pipeline_logger( - reporting_dir: str | None, - reporters: List[Reporters] | None = [], - index_name: str = "", - num_workflow_steps: int = 0, -) -> WorkflowCallbacks: - """Create a callback manager and register a list of loggers. - - Loggers may be configured as generic loggers or associated with a specified indexing job. - """ - # always register the console logger if no loggers are specified - if Reporters.CONSOLE not in reporters: - reporters.append(Reporters.CONSOLE) - - azure_client_manager = AzureClientManager() - callback_manager = WorkflowCallbacksManager() - for reporter in reporters: - match reporter: - case Reporters.BLOB: - # create a dedicated container for logs - container_name = "logs" - if reporting_dir is not None: - container_name = os.path.join(reporting_dir, container_name) - # ensure the root directory exists; if not, create it - blob_service_client = azure_client_manager.get_blob_service_client() - container_root = Path(container_name).parts[0] - if not blob_service_client.get_container_client( - container_root - ).exists(): - blob_service_client.create_container(container_root) - # register the blob reporter - callback_manager.register( - BlobWorkflowCallbacks( - blob_service_client=blob_service_client, - container_name=container_name, - index_name=index_name, - num_workflow_steps=num_workflow_steps, - ) - ) - case Reporters.FILE: - callback_manager.register(FileWorkflowCallbacks(dir=reporting_dir)) - case Reporters.APP_INSIGHTS: - if os.getenv("APP_INSIGHTS_CONNECTION_STRING"): - callback_manager.register( - ApplicationInsightsWorkflowCallbacks( - connection_string=os.environ[ - "APP_INSIGHTS_CONNECTION_STRING" - ], - index_name=index_name, - num_workflow_steps=num_workflow_steps, - ) - ) - case Reporters.CONSOLE: - callback_manager.register( - ConsoleWorkflowCallbacks( - index_name=index_name, num_workflow_steps=num_workflow_steps - ) - ) - case _: - print(f"WARNING: unknown reporter type: {reporter}. Skipping.") - return callback_manager diff --git a/backend/src/logger/logger_singleton.py b/backend/src/logger/logger_singleton.py deleted file mode 100644 index 7fb8366a..00000000 --- a/backend/src/logger/logger_singleton.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import os -from urllib.parse import urlparse - -from datashaper import WorkflowCallbacks - -from src.logger.load_logger import load_pipeline_logger -from src.logger.typing import Reporters - - -class LoggerSingleton: - _instance: WorkflowCallbacks = None - - @classmethod - def get_instance(cls) -> WorkflowCallbacks: - if cls._instance is None: - # Set up reporters based on environment variable or defaults - reporters = [] - for reporter_name in os.getenv( - "REPORTERS", Reporters.CONSOLE.name.upper() - ).split(","): - try: - reporters.append(Reporters[reporter_name.upper()]) - except KeyError: - raise ValueError(f"Found unknown reporter: {reporter_name}") - cls._instance = load_pipeline_logger(reporting_dir="", reporters=reporters) - return cls._instance - - -def _is_valid_url(url: str) -> bool: - try: - result = urlparse(url) - return all([result.scheme, result.netloc]) - except ValueError: - return False diff --git a/backend/src/utils/__init__.py b/backend/src/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/src/utils/query.py b/backend/src/utils/query.py deleted file mode 100644 index 46c0e9f9..00000000 --- a/backend/src/utils/query.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import pandas as pd -from graphrag.query.indexer_adapters import ( - read_indexer_covariates, - read_indexer_entities, - read_indexer_relationships, - read_indexer_reports, - read_indexer_text_units, -) - -from src.api.common import get_pandas_storage_options - - -def get_entities( - entity_table_path: str, - entity_embedding_table_path: str, - community_level: int = 0, -) -> pd.DataFrame: - storage_options = get_pandas_storage_options() - entity_df = pd.read_parquet( - entity_table_path, - storage_options=storage_options, - ) - entity_embedding_df = pd.read_parquet( - entity_embedding_table_path, - storage_options=storage_options, - ) - return pd.DataFrame( - read_indexer_entities(entity_df, entity_embedding_df, community_level) - ) - - -def get_reports( - entity_table_path: str, community_report_table_path: str, community_level: int -) -> pd.DataFrame: - storage_options = get_pandas_storage_options() - entity_df = pd.read_parquet( - entity_table_path, - storage_options=storage_options(), - ) - report_df = pd.read_parquet( - community_report_table_path, - storage_options=storage_options(), - ) - return pd.DataFrame(read_indexer_reports(report_df, entity_df, community_level)) - - -def get_relationships(relationships_table_path: str) -> pd.DataFrame: - relationship_df = pd.read_parquet( - relationships_table_path, - storage_options=get_pandas_storage_options(), - ) - return pd.DataFrame(read_indexer_relationships(relationship_df)) - - -def get_covariates(covariate_table_path: str) -> pd.DataFrame: - covariate_df = pd.read_parquet( - covariate_table_path, - storage_options=get_pandas_storage_options(), - ) - return pd.DataFrame(read_indexer_covariates(covariate_df)) - - -def get_text_units(text_unit_table_path: str) -> pd.DataFrame: - text_unit_df = pd.read_parquet( - text_unit_table_path, - storage_options=get_pandas_storage_options(), - ) - return pd.DataFrame(read_indexer_text_units(text_unit_df)) - - -def get_df( - table_path: str, -) -> pd.DataFrame: - df = pd.read_parquet( - table_path, - storage_options=get_pandas_storage_options(), - ) - return df diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index b7881591..41883c09 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -1,8 +1,8 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import inspect import os +from pathlib import Path from typing import Generator import pytest @@ -10,20 +10,21 @@ from azure.storage.blob import BlobServiceClient from fastapi.testclient import TestClient -from src.api.common import sanitize_name -from src.main import app +from graphrag_app.main import app +from graphrag_app.utils.common import sanitize_name @pytest.fixture(scope="session") def blob_with_data_container_name(blob_service_client: BlobServiceClient): # create a storage container and upload some data container_name = "container-with-data" - blob_service_client.create_container(container_name) - blob_client = blob_service_client.get_blob_client(container_name, "data.txt") + sanitized_name = sanitize_name(container_name) + blob_service_client.create_container(sanitized_name) + blob_client = blob_service_client.get_blob_client(sanitized_name, "data.txt") blob_client.upload_blob(data="Hello, World!", overwrite=True) yield container_name # cleanup - blob_service_client.delete_container(container_name) + blob_service_client.delete_container(sanitized_name) @pytest.fixture(scope="session") @@ -62,7 +63,7 @@ def container_with_graphml_file( if not blob_service_client.get_container_client(sanitized_name).exists(): blob_service_client.create_container(sanitized_name) blob_client = blob_service_client.get_blob_client( - sanitized_name, "output/summarized_graph.graphml" + sanitized_name, "output/graph.graphml" ) blob_client.upload_blob(data="a fake graphml file", overwrite=True) # add an entry to the container-store table in cosmos db @@ -90,22 +91,14 @@ def container_with_index_files( if not blob_service_client.get_container_client(sanitized_name).exists(): blob_service_client.create_container(sanitized_name) - # upload data/aliens-dataset/output folder to the container - this_directory = os.path.dirname( - os.path.abspath(inspect.getfile(inspect.currentframe())) - ) - data_root = f"{this_directory}/data/synthetic-dataset/output" - for file in [ - "create_base_documents.parquet", - "create_final_entities.parquet", - "create_final_relationships.parquet", - "create_final_community_reports.parquet", - "create_base_text_units.parquet", - ]: + # upload synthetic index to a container + data_root = Path(__file__).parent / "data/synthetic-dataset/output" + for file in data_root.iterdir(): + # upload each file in the output folder blob_client = blob_service_client.get_blob_client( - sanitized_name, f"output/{file}" + sanitized_name, f"output/{file.name}" ) - local_file = f"{data_root}/{file}" + local_file = f"{data_root}/{file.name}" with open(local_file, "rb") as data: blob_client.upload_blob(data, overwrite=True) diff --git a/backend/tests/data/synthetic-dataset/output/create_final_communities.parquet b/backend/tests/data/synthetic-dataset/output/create_final_communities.parquet new file mode 100644 index 00000000..a53af771 Binary files /dev/null and b/backend/tests/data/synthetic-dataset/output/create_final_communities.parquet differ diff --git a/backend/tests/data/synthetic-dataset/output/create_final_community_reports.parquet b/backend/tests/data/synthetic-dataset/output/create_final_community_reports.parquet index fc59983d..6dc5304b 100644 Binary files a/backend/tests/data/synthetic-dataset/output/create_final_community_reports.parquet and b/backend/tests/data/synthetic-dataset/output/create_final_community_reports.parquet differ diff --git a/backend/tests/data/synthetic-dataset/output/create_final_covariates.parquet b/backend/tests/data/synthetic-dataset/output/create_final_covariates.parquet new file mode 100644 index 00000000..0c6e171e Binary files /dev/null and b/backend/tests/data/synthetic-dataset/output/create_final_covariates.parquet differ diff --git a/backend/tests/data/synthetic-dataset/output/create_base_documents.parquet b/backend/tests/data/synthetic-dataset/output/create_final_documents.parquet similarity index 88% rename from backend/tests/data/synthetic-dataset/output/create_base_documents.parquet rename to backend/tests/data/synthetic-dataset/output/create_final_documents.parquet index 74a3cb04..19d2b0a4 100644 Binary files a/backend/tests/data/synthetic-dataset/output/create_base_documents.parquet and b/backend/tests/data/synthetic-dataset/output/create_final_documents.parquet differ diff --git a/backend/tests/data/synthetic-dataset/output/create_final_entities.parquet b/backend/tests/data/synthetic-dataset/output/create_final_entities.parquet index f14ff5c4..b4da1122 100644 Binary files a/backend/tests/data/synthetic-dataset/output/create_final_entities.parquet and b/backend/tests/data/synthetic-dataset/output/create_final_entities.parquet differ diff --git a/backend/tests/data/synthetic-dataset/output/create_final_nodes.parquet b/backend/tests/data/synthetic-dataset/output/create_final_nodes.parquet new file mode 100644 index 00000000..0e1774d0 Binary files /dev/null and b/backend/tests/data/synthetic-dataset/output/create_final_nodes.parquet differ diff --git a/backend/tests/data/synthetic-dataset/output/create_final_relationships.parquet b/backend/tests/data/synthetic-dataset/output/create_final_relationships.parquet index 28784f8e..174d28d0 100644 Binary files a/backend/tests/data/synthetic-dataset/output/create_final_relationships.parquet and b/backend/tests/data/synthetic-dataset/output/create_final_relationships.parquet differ diff --git a/backend/tests/data/synthetic-dataset/output/create_base_text_units.parquet b/backend/tests/data/synthetic-dataset/output/create_final_text_units.parquet similarity index 70% rename from backend/tests/data/synthetic-dataset/output/create_base_text_units.parquet rename to backend/tests/data/synthetic-dataset/output/create_final_text_units.parquet index c743f12f..76930e00 100644 Binary files a/backend/tests/data/synthetic-dataset/output/create_base_text_units.parquet and b/backend/tests/data/synthetic-dataset/output/create_final_text_units.parquet differ diff --git a/backend/tests/data/synthetic-dataset/output/graph.graphml b/backend/tests/data/synthetic-dataset/output/graph.graphml new file mode 100644 index 00000000..fc1a58a3 --- /dev/null +++ b/backend/tests/data/synthetic-dataset/output/graph.graphml @@ -0,0 +1,323 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/backend/tests/data/synthetic-dataset/output/stats.json b/backend/tests/data/synthetic-dataset/output/stats.json new file mode 100644 index 00000000..16b22bda --- /dev/null +++ b/backend/tests/data/synthetic-dataset/output/stats.json @@ -0,0 +1,43 @@ +{ + "total_runtime": 358.0534498691559, + "num_documents": 1, + "input_load_time": 0, + "workflows": { + "create_base_text_units": { + "overall": 2.060708999633789 + }, + "create_final_documents": { + "overall": 0.043251991271972656 + }, + "extract_graph": { + "overall": 162.8238878250122 + }, + "compute_communities": { + "overall": 14.345926284790039 + }, + "create_final_entities": { + "overall": 0.04870915412902832 + }, + "create_final_relationships": { + "overall": 0.05901288986206055 + }, + "create_final_nodes": { + "overall": 0.07453203201293945 + }, + "create_final_communities": { + "overall": 0.127485990524292 + }, + "create_final_covariates": { + "overall": 142.99078702926636 + }, + "create_final_text_units": { + "overall": 0.12473607063293457 + }, + "create_final_community_reports": { + "overall": 31.13183307647705 + }, + "generate_text_embeddings": { + "overall": 3.978173017501831 + } + } +} \ No newline at end of file diff --git a/backend/tests/integration/test_api_data.py b/backend/tests/integration/test_api_data.py index 054c51f5..8b72c416 100644 --- a/backend/tests/integration/test_api_data.py +++ b/backend/tests/integration/test_api_data.py @@ -19,7 +19,7 @@ def test_upload_files(cosmos_client: CosmosClient, client): response = client.post( "/data", files={"files": ("test.txt", f)}, - params={"storage_name": "testContainer"}, + params={"container_name": "testContainer"}, ) # check the response assert response.status_code == 200 diff --git a/backend/tests/integration/test_api_index.py b/backend/tests/integration/test_api_index.py index b0619be5..82d69389 100644 --- a/backend/tests/integration/test_api_index.py +++ b/backend/tests/integration/test_api_index.py @@ -17,13 +17,16 @@ def test_schedule_index_without_data(client, cosmos_client: CosmosClient): """Test scheduling an index job with a non-existent data blob container.""" response = client.post( "/index", - params={"index_name": "myindex", "storage_name": "nonexistent-data-container"}, + params={ + "index_container_name": "myindex", + "storage_container_name": "nonexistent-data-container", + }, ) assert response.status_code == 500 # def test_schedule_index_with_data(client, cosmos_client, blob_with_data_container_name): # """Test scheduling an index job with real data.""" -# response = client.post("/index", files=None, params={"storage_name": blob_with_data_container_name, "index_name": "myindex"}) +# response = client.post("/index", files=None, params={"storage_container_name": blob_with_data_container_name, "index_container_name": "myindex"}) # print(response.json()) # assert response.status_code == 200 diff --git a/backend/tests/integration/test_api_prompt_tuning.py b/backend/tests/integration/test_api_prompt_tuning.py new file mode 100644 index 00000000..b054020e --- /dev/null +++ b/backend/tests/integration/test_api_prompt_tuning.py @@ -0,0 +1,33 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +""" +Integration tests for the /index/config API endpoints. +""" + +from unittest.mock import AsyncMock, patch + +import pytest_asyncio + + +@pytest_asyncio.fixture +def mock_generate_indexing_prompts(): + with patch( + "graphrag.api.generate_indexing_prompts", new_callable=AsyncMock + ) as mock: + mock.return_value = ( + "synthetic-prompt1", + "synthetic-prompt2", + "synthetic-prompt3", + ) + yield mock + + +def test_generate_prompts( + blob_with_data_container_name, mock_generate_indexing_prompts, client +): + """Test generating prompts.""" + response = client.get( + "/index/config/prompts", + params={"container_name": blob_with_data_container_name}, + ) + assert response.status_code == 200 diff --git a/backend/tests/integration/test_api_source.py b/backend/tests/integration/test_api_source.py index 902bebba..00a11059 100644 --- a/backend/tests/integration/test_api_source.py +++ b/backend/tests/integration/test_api_source.py @@ -8,7 +8,7 @@ def test_get_report(container_with_index_files: str, client: TestClient): - """Test retrieving a report via the src.api.source.get_report_info() function.""" + """Test retrieving a report via the graphrag_app.api.source.get_report_info() function.""" # retrieve a report that exists response = client.get(f"/source/report/{container_with_index_files}/1") assert response.status_code == 200 @@ -20,7 +20,7 @@ def test_get_report(container_with_index_files: str, client: TestClient): def test_get_chunk_info(container_with_index_files: str, client: TestClient): """Test retrieving a text chunk.""" response = client.get( - f"/source/text/{container_with_index_files}/5b2d21ec6fc171c30bdda343f128f5a6" + f"/source/text/{container_with_index_files}/c4197a012ea9e7d2618450cbb197852dec47c40883d4a69e0ea473a8111319c80d608ae5fa66acc2d3f95cd845277b3acd8186d7fa326803dde09681da29790c" ) assert response.status_code == 200 diff --git a/backend/tests/integration/test_utils_pipeline.py b/backend/tests/integration/test_utils_pipeline.py new file mode 100644 index 00000000..abc4b822 --- /dev/null +++ b/backend/tests/integration/test_utils_pipeline.py @@ -0,0 +1,109 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +""" +Integration tests for the PipelineJob class. +""" + +from typing import Generator + +import pytest + +from graphrag_app.typing.pipeline import PipelineJobState +from graphrag_app.utils.pipeline import PipelineJob + + +@pytest.fixture() +def cosmos_index_job_entry(cosmos_client) -> Generator[str, None, None]: + """Create an entry for an indexing job in the appropriate CosmosDB database and container + that graphrag expects when first scheduling an indexing job.""" + + db_client = cosmos_client.get_database_client("graphrag") + container_client = db_client.get_container_client("jobs") + synthetic_job_entry = { + "id": "testID", + "epoch_request_time": 0, + "human_readable_index_name": "test_human_readable_index_name", + "sanitized_index_name": "test_sanitized_index_name", + "human_readable_storage_name": "test_human_readable_storage_name", + "sanitized_storage_name": "test_sanitized_storage_name", + "all_workflows": ["workflow1", "workflow2"], + "completed_workflows": ["workflow1"], + "failed_workflows": ["workflow2"], + "status": PipelineJobState.COMPLETE, + "percent_complete": 50.0, + "progress": "some progress", + } + container_client.upsert_item(synthetic_job_entry) + yield synthetic_job_entry["id"] + # teardown + container_client.delete_item( + synthetic_job_entry["id"], partition_key=synthetic_job_entry["id"] + ) + + +def test_pipeline_job_interface(cosmos_index_job_entry): + """Test the graphrag_app.utils.pipeline.PipelineJob class interface.""" + pipeline_job = PipelineJob() + + # test creating a new entry + pipeline_job.create_item( + id="synthetic_id", + human_readable_index_name="test_human_readable_index_name", + human_readable_storage_name="test_human_readable_storage_name", + entity_extraction_prompt="fake entity extraction prompt", + community_report_prompt="fake community report prompt", + summarize_descriptions_prompt="fake summarize descriptions prompt", + ) + assert pipeline_job.item_exist("synthetic_id") + + # test loading an existing entry + pipeline_job = pipeline_job.load_item(cosmos_index_job_entry) + assert pipeline_job.id == "testID" + assert pipeline_job.human_readable_index_name == "test_human_readable_index_name" + assert pipeline_job.sanitized_index_name == "test_sanitized_index_name" + assert ( + pipeline_job.human_readable_storage_name == "test_human_readable_storage_name" + ) + assert pipeline_job.sanitized_storage_name == "test_sanitized_storage_name" + assert pipeline_job.all_workflows == ["workflow1", "workflow2"] + assert pipeline_job.completed_workflows == ["workflow1"] + assert pipeline_job.failed_workflows == ["workflow2"] + assert pipeline_job.status == PipelineJobState.COMPLETE + assert pipeline_job.percent_complete == 50.0 + assert pipeline_job.progress == "some progress" + assert pipeline_job.calculate_percent_complete() == 50.0 + + # test setters and getters + pipeline_job.id = "newID" + assert pipeline_job.id == "newID" + pipeline_job.epoch_request_time = 1 + assert pipeline_job.epoch_request_time == 1 + + pipeline_job.human_readable_index_name = "new_human_readable_index_name" + assert pipeline_job.human_readable_index_name == "new_human_readable_index_name" + pipeline_job.sanitized_index_name = "new_sanitized_index_name" + assert pipeline_job.sanitized_index_name == "new_sanitized_index_name" + + pipeline_job.human_readable_storage_name = "new_human_readable_storage_name" + assert pipeline_job.human_readable_storage_name == "new_human_readable_storage_name" + pipeline_job.sanitized_storage_name = "new_sanitized_storage_name" + assert pipeline_job.sanitized_storage_name == "new_sanitized_storage_name" + + pipeline_job.entity_extraction_prompt = "new_entity_extraction_prompt" + assert pipeline_job.entity_extraction_prompt == "new_entity_extraction_prompt" + pipeline_job.community_report_prompt = "new_community_report_prompt" + assert pipeline_job.community_report_prompt == "new_community_report_prompt" + pipeline_job.summarize_descriptions_prompt = "new_summarize_descriptions_prompt" + assert ( + pipeline_job.summarize_descriptions_prompt + == "new_summarize_descriptions_prompt" + ) + + pipeline_job.all_workflows = ["new_workflow1", "new_workflow2", "new_workflow3"] + assert len(pipeline_job.all_workflows) == 3 + + pipeline_job.completed_workflows = ["new_workflow1", "new_workflow2"] + assert len(pipeline_job.completed_workflows) == 2 + + pipeline_job.failed_workflows = ["new_workflow3"] + assert len(pipeline_job.failed_workflows) == 1 diff --git a/backend/tests/unit/test_azure_clients.py b/backend/tests/unit/test_azure_clients.py index e69d4a3a..e6109df5 100644 --- a/backend/tests/unit/test_azure_clients.py +++ b/backend/tests/unit/test_azure_clients.py @@ -5,7 +5,7 @@ from azure.storage.blob import BlobServiceClient from azure.storage.blob.aio import BlobServiceClient as BlobServiceClientAsync -from src.api.azure_clients import ( +from graphrag_app.utils.azure_clients import ( AzureClientManager, _BlobServiceClientSingleton, _BlobServiceClientSingletonAsync, diff --git a/backend/tests/unit/test_common.py b/backend/tests/unit/test_common.py index 4b6b078e..fbf230bc 100644 --- a/backend/tests/unit/test_common.py +++ b/backend/tests/unit/test_common.py @@ -3,59 +3,32 @@ import pytest -from src.api.common import ( - retrieve_original_blob_container_name, +from graphrag_app.utils.common import ( + desanitize_name, sanitize_name, - validate_blob_container_name, validate_index_file_exist, ) -def test_validate_blob_container_name(): - """Test the src.api.common.validate_blob_container_name function.""" - # test valid container name - assert validate_blob_container_name("validcontainername") is None - # test invalid container name - with pytest.raises(ValueError): - validate_blob_container_name("invalidContainerName") - with pytest.raises(ValueError): - validate_blob_container_name( - "invalidcontainernameinvalidcontainernameinvalidcontainerinvalids" - ) - with pytest.raises(ValueError): - validate_blob_container_name("*invalidContainerName") - with pytest.raises(ValueError): - validate_blob_container_name("invalid+ContainerName") - with pytest.raises(ValueError): - validate_blob_container_name("invalid--containername") - with pytest.raises(ValueError): - validate_blob_container_name("invalidcontainername-") - - -def test_retrieve_original_blob_container_name(container_with_graphml_file): - """Test the src.api.common.retrieve_original_blob_container_name function.""" +def test_desanitize_name(container_with_graphml_file): + """Test the graphrag_app.utils.common.desanitize_name function.""" # test retrieving a valid container name original_name = container_with_graphml_file sanitized_name = sanitize_name(original_name) - assert retrieve_original_blob_container_name(sanitized_name) == original_name + assert desanitize_name(sanitized_name) == original_name # test retrieving an invalid container name - assert retrieve_original_blob_container_name("nonexistent-container") is None + assert desanitize_name("nonexistent-container") is None def test_validate_index_file_exist(container_with_graphml_file): - """Test the src.api.common.validate_index_file_exist function.""" + """Test the graphrag_app.utils.common.validate_index_file_exist function.""" original_name = container_with_graphml_file sanitized_name = sanitize_name(original_name) # test with a valid index and valid file - assert ( - validate_index_file_exist(sanitized_name, "output/summarized_graph.graphml") - is None - ) + assert validate_index_file_exist(sanitized_name, "output/graph.graphml") is None # test with a valid index and non-existent file with pytest.raises(ValueError): validate_index_file_exist(sanitized_name, "non-existent-file") # test non-existent index and valid file with pytest.raises(ValueError): - validate_index_file_exist( - "nonexistent-index", "output/summarized_graph.graphml" - ) + validate_index_file_exist("nonexistent-index", "output/graph.graphml") diff --git a/backend/tests/unit/test_load_logger.py b/backend/tests/unit/test_load_logger.py index 4924bbd6..ee616308 100644 --- a/backend/tests/unit/test_load_logger.py +++ b/backend/tests/unit/test_load_logger.py @@ -2,13 +2,13 @@ import pytest -from src.logger.load_logger import load_pipeline_logger +from graphrag_app.logger.load_logger import load_pipeline_logger @pytest.fixture def mock_app_insights_workflow_callbacks(): with patch( - "src.logger.application_insights_workflow_callbacks.ApplicationInsightsWorkflowCallbacks" + "graphrag_app.logger.application_insights_workflow_callbacks.ApplicationInsightsWorkflowCallbacks" ) as mock_app_insights_workflow_callbacks: yield mock_app_insights_workflow_callbacks @@ -24,7 +24,7 @@ def mock_file_workflow_callbacks(): @pytest.fixture def mock_blob_workflow_callbacks(): with patch( - "src.logger.blob_workflow_callbacks.BlobWorkflowCallbacks" + "graphrag_app.logger.blob_workflow_callbacks.BlobWorkflowCallbacks" ) as mock_blob_workflow_callbacks: yield mock_blob_workflow_callbacks @@ -32,7 +32,7 @@ def mock_blob_workflow_callbacks(): @pytest.fixture def mock_console_workflow_callbacks(): with patch( - "src.logger.console_workflow_callbacks.ConsoleWorkflowCallbacks" + "graphrag_app.logger.console_workflow_callbacks.ConsoleWorkflowCallbacks" ) as mock_console_workflow_callbacks: yield mock_console_workflow_callbacks @@ -46,8 +46,8 @@ def test_load_pipeline_logger_with_console( ): """Test load_pipeline_logger.""" loggers = load_pipeline_logger( - reporting_dir="logs", - reporters=["app_insights", "blob", "console", "file"], + logging_dir="logs", + loggers=["app_insights", "blob", "console", "file"], index_name="test-index", num_workflow_steps=4, ) diff --git a/backend/tests/unit/test_logger_app_insights_callbacks.py b/backend/tests/unit/test_logger_app_insights_callbacks.py index 0e114b8b..58b8bf09 100644 --- a/backend/tests/unit/test_logger_app_insights_callbacks.py +++ b/backend/tests/unit/test_logger_app_insights_callbacks.py @@ -6,7 +6,7 @@ import pytest -from src.logger.application_insights_workflow_callbacks import ( +from graphrag_app.logger.application_insights_workflow_callbacks import ( ApplicationInsightsWorkflowCallbacks, ) @@ -14,7 +14,7 @@ @pytest.fixture def mock_logger(): with patch( - "src.logger.application_insights_workflow_callbacks.logging.getLogger" + "graphrag_app.logger.application_insights_workflow_callbacks.logging.getLogger" ) as mock_get_logger: mock_logger_instance = MagicMock(spec=logging.Logger) mock_get_logger.return_value = mock_logger_instance @@ -24,7 +24,7 @@ def mock_logger(): @pytest.fixture def workflow_callbacks(mock_logger): with patch( - "src.logger.application_insights_workflow_callbacks.ApplicationInsightsWorkflowCallbacks.__init__", + "graphrag_app.logger.application_insights_workflow_callbacks.ApplicationInsightsWorkflowCallbacks.__init__", return_value=None, ): instance = ApplicationInsightsWorkflowCallbacks() @@ -37,26 +37,26 @@ def workflow_callbacks(mock_logger): yield instance -def test_on_workflow_start(workflow_callbacks, mock_logger): - workflow_callbacks.on_workflow_start("test_workflow", object()) +def test_workflow_start(workflow_callbacks, mock_logger): + workflow_callbacks.workflow_start("test_workflow", object()) assert mock_logger.info.called -def test_on_workflow_end(workflow_callbacks, mock_logger): - workflow_callbacks.on_workflow_end("test_workflow", object()) +def test_workflow_end(workflow_callbacks, mock_logger): + workflow_callbacks.workflow_end("test_workflow", object()) assert mock_logger.info.called -def test_on_log(workflow_callbacks, mock_logger): - workflow_callbacks.on_log("test_log_message") +def test_log(workflow_callbacks, mock_logger): + workflow_callbacks.log("test_log_message") assert mock_logger.info.called -def test_on_warning(workflow_callbacks, mock_logger): - workflow_callbacks.on_warning("test_warning") +def test_warning(workflow_callbacks, mock_logger): + workflow_callbacks.warning("test_warning") assert mock_logger.warning.called -def test_on_error(workflow_callbacks, mock_logger): - workflow_callbacks.on_error("test_error", Exception("test_exception")) +def test_error(workflow_callbacks, mock_logger): + workflow_callbacks.error("test_error", Exception("test_exception")) assert mock_logger.error.called diff --git a/backend/tests/unit/test_logger_blob_callbacks.py b/backend/tests/unit/test_logger_blob_callbacks.py index c1d935b2..865ea744 100644 --- a/backend/tests/unit/test_logger_blob_callbacks.py +++ b/backend/tests/unit/test_logger_blob_callbacks.py @@ -5,13 +5,13 @@ import pytest -from src.logger.blob_workflow_callbacks import BlobWorkflowCallbacks +from graphrag_app.logger.blob_workflow_callbacks import BlobWorkflowCallbacks @pytest.fixture def mock_blob_service_client(): with patch( - "src.logger.blob_workflow_callbacks.BlobServiceClient" + "graphrag_app.logger.blob_workflow_callbacks.BlobServiceClient" ) as mock_blob_service_client: yield mock_blob_service_client @@ -19,7 +19,7 @@ def mock_blob_service_client(): @pytest.fixture def workflow_callbacks(mock_blob_service_client): with patch( - "src.logger.blob_workflow_callbacks.BlobWorkflowCallbacks.__init__", + "graphrag_app.logger.blob_workflow_callbacks.BlobWorkflowCallbacks.__init__", return_value=None, ): instance = BlobWorkflowCallbacks() @@ -34,26 +34,16 @@ def workflow_callbacks(mock_blob_service_client): def test_on_workflow_start(workflow_callbacks): - workflow_callbacks.on_workflow_start("test_workflow", object()) + workflow_callbacks.workflow_start("test_workflow", object()) # check if blob workflow callbacks _write_log() method was called assert workflow_callbacks._blob_service_client.get_blob_client().append_block.called def test_on_workflow_end(workflow_callbacks): - workflow_callbacks.on_workflow_end("test_workflow", object()) + workflow_callbacks.workflow_end("test_workflow", object()) assert workflow_callbacks._blob_service_client.get_blob_client().append_block.called -# def test_on_workflow_step_start(workflow_callbacks): -# workflow_callbacks.on_workflow_step_start("test_step", object()) -# assert workflow_callbacks._blob_service_client.get_blob_client().append_block.called - - -# def test_on_workflow_step_end(workflow_callbacks): -# workflow_callbacks.on_workflow_step_end("test_step", object()) -# assert workflow_callbacks._blob_service_client.get_blob_client().append_block.called - - def test_on_error(workflow_callbacks): - workflow_callbacks.on_error("test_error", Exception("test_exception")) + workflow_callbacks.error("test_error", Exception("test_exception")) assert workflow_callbacks._blob_service_client.get_blob_client().append_block.called diff --git a/backend/tests/unit/test_logger_console_callbacks.py b/backend/tests/unit/test_logger_console_callbacks.py index 5e847025..5d941b19 100644 --- a/backend/tests/unit/test_logger_console_callbacks.py +++ b/backend/tests/unit/test_logger_console_callbacks.py @@ -6,13 +6,13 @@ import pytest -from src.logger.console_workflow_callbacks import ConsoleWorkflowCallbacks +from graphrag_app.logger.console_workflow_callbacks import ConsoleWorkflowCallbacks @pytest.fixture def mock_logger(): with patch( - "src.logger.console_workflow_callbacks.logging.getLogger" + "graphrag_app.logger.console_workflow_callbacks.logging.getLogger" ) as mock_get_logger: mock_logger_instance = MagicMock(spec=logging.Logger) mock_get_logger.return_value = mock_logger_instance @@ -22,7 +22,7 @@ def mock_logger(): @pytest.fixture def workflow_callbacks(mock_logger): with patch( - "src.logger.console_workflow_callbacks.ConsoleWorkflowCallbacks.__init__", + "graphrag_app.logger.console_workflow_callbacks.ConsoleWorkflowCallbacks.__init__", return_value=None, ): instance = ConsoleWorkflowCallbacks() @@ -34,26 +34,26 @@ def workflow_callbacks(mock_logger): yield instance -def test_on_workflow_start(workflow_callbacks, mock_logger): - workflow_callbacks.on_workflow_start("test_workflow", object()) +def test_workflow_start(workflow_callbacks, mock_logger): + workflow_callbacks.workflow_start("test_workflow", object()) assert mock_logger.info.called -def test_on_workflow_end(workflow_callbacks, mock_logger): - workflow_callbacks.on_workflow_end("test_workflow", object()) +def test_workflow_end(workflow_callbacks, mock_logger): + workflow_callbacks.workflow_end("test_workflow", object()) assert mock_logger.info.called -def test_on_log(workflow_callbacks, mock_logger): - workflow_callbacks.on_log("test_log_message") +def test_log(workflow_callbacks, mock_logger): + workflow_callbacks.log("test_log_message") assert mock_logger.info.called -def test_on_warning(workflow_callbacks, mock_logger): - workflow_callbacks.on_warning("test_warning") +def test_warning(workflow_callbacks, mock_logger): + workflow_callbacks.warning("test_warning") assert mock_logger.warning.called -def test_on_error(workflow_callbacks, mock_logger): - workflow_callbacks.on_error("test_error", Exception("test_exception")) +def test_error(workflow_callbacks, mock_logger): + workflow_callbacks.error("test_error", Exception("test_exception")) assert mock_logger.error.called diff --git a/docker/Dockerfile-backend b/docker/Dockerfile-backend index 62c97bbf..ce0bb3d1 100644 --- a/docker/Dockerfile-backend +++ b/docker/Dockerfile-backend @@ -1,8 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -# For more information about the base image visit: -# https://mcr.microsoft.com/en-us/artifact/mar/devcontainers/python/about +# For more information about the base image: https://mcr.microsoft.com/en-us/artifact/mar/devcontainers/python/about FROM mcr.microsoft.com/devcontainers/python:3.10-bookworm # default graphrag version will be 0.0.0 unless overridden by --build-arg @@ -11,7 +10,6 @@ ENV GRAPHRAG_VERSION=v${GRAPHRAG_VERSION} ENV PIP_ROOT_USER_ACTION=ignore ENV PIP_DISABLE_PIP_VERSION_CHECK=1 ENV SETUPTOOLS_USE_DISTUTILS=stdlib -ENV PYTHONPATH=/backend ENV TIKTOKEN_CACHE_DIR=/opt/tiktoken_cache/ COPY backend /backend @@ -22,10 +20,9 @@ RUN cd backend \ # download all nltk data that graphrag requires RUN python -c "import nltk;nltk.download(['punkt','averaged_perceptron_tagger','maxent_ne_chunker','words','wordnet'])" - # download tiktoken model encodings RUN python -c "import tiktoken; tiktoken.encoding_for_model('gpt-3.5-turbo'); tiktoken.encoding_for_model('gpt-4'); tiktoken.encoding_for_model('gpt-4o');" WORKDIR /backend EXPOSE 80 -CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "80"] +CMD ["uvicorn", "graphrag_app.main:app", "--host", "0.0.0.0", "--port", "80"] diff --git a/docs/DEPLOYMENT-GUIDE.md b/docs/DEPLOYMENT-GUIDE.md index 8100648a..20af001c 100644 --- a/docs/DEPLOYMENT-GUIDE.md +++ b/docs/DEPLOYMENT-GUIDE.md @@ -89,13 +89,12 @@ In the `deploy.parameters.json` file, provide values for the following required `RESOURCE_GROUP` | | Yes | The resource group that GraphRAG will be deployed in. Will get created automatically if the resource group does not exist. `GRAPHRAG_IMAGE` | graphrag:backend | No | The name and tag of the graphrag docker image in the container registry. Will default to `graphrag:backend` and be hosted at `my_container_registry_name>.azurecr.io/graphrag:backend`. `CONTAINER_REGISTRY_NAME` | | No | Name of an Azure Container Registry where the `graphrag` backend docker image will be hosted. Leave off `.azurecr.io` from the name. If not provided, a unique name will be generated (recommended). -`GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT` | | No | Endpoint for cognitive services identity authorization. Will default to `https://cognitiveservices.azure.com/.default` for Azure Commercial cloud but should be defined for deployments in other Azure clouds. +`COGNITIVE_SERVICES_AUDIENCE` | | No | Endpoint for cognitive services identity authorization. Will default to `https://cognitiveservices.azure.com/.default` for Azure Commercial cloud but should be defined for deployments in other Azure clouds. `APIM_NAME` | | No | Hostname of the API. Must be a globally unique name. The API will be accessible at `https://.azure-api.net`. If not provided a unique name will be generated. `APIM_TIER` | | No | The [APIM tier](https://azure.microsoft.com/en-us/pricing/details/api-management) to use. Must be either `Developer` or `StandardV2`. Will default to `Developer` for cost savings. `RESOURCE_BASE_NAME` | | No | Suffix to apply to all azure resource names. If not provided a unique suffix will be generated. `AISEARCH_ENDPOINT_SUFFIX` | | No | Suffix to apply to AI search endpoint. Will default to `search.windows.net` for Azure Commercial cloud but should be overridden for deployments in other Azure clouds. `AISEARCH_AUDIENCE` | | No | Audience for AAD for AI Search. Will default to `https://search.azure.com/` for Azure Commercial cloud but should be overridden for deployments in other Azure clouds. -`REPORTERS` | blob,console,app_insights | No | The type of logging to enable. A comma separated string containing any of the following values: `[blob,console,file,app_insights]`. Will default to `"blob,console,app_insights"`. ### 5. Deploy solution accelerator to the resource group ```shell diff --git a/infra/core/acr/acr.bicep b/infra/core/acr/acr.bicep index 51e2821a..41ec2c6b 100644 --- a/infra/core/acr/acr.bicep +++ b/infra/core/acr/acr.bicep @@ -1,12 +1,12 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + @description('The name of the Container Registry resource. Will be automatically generated if not provided.') param registryName string @description('The location of the Container Registry resource.') param location string = resourceGroup().location -@description('Array of objects with fields principalId, principalType, roleDefinitionId') -param roleAssignments array = [] - resource registry 'Microsoft.ContainerRegistry/registries@2023-11-01-preview' = { name: registryName location: location @@ -27,14 +27,6 @@ resource registry 'Microsoft.ContainerRegistry/registries@2023-11-01-preview' = } } -resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ - for role in roleAssignments: { - name: guid('${role.principalId}-${role.principalType}-${role.roleDefinitionId}') - scope: registry - properties: role - } -] - output name string = registry.name output id string = registry.id output loginServer string = registry.properties.loginServer diff --git a/infra/core/ai-search/ai-search.bicep b/infra/core/ai-search/ai-search.bicep index 020aed3c..69e18a73 100644 --- a/infra/core/ai-search/ai-search.bicep +++ b/infra/core/ai-search/ai-search.bicep @@ -7,10 +7,7 @@ param name string @description('The location of the Managed Cluster resource.') param location string = resourceGroup().location -@description('Array of objects with fields principalId, principalType, roleDefinitionId') -param roleAssignments array = [] - -@allowed([ 'enabled', 'disabled' ]) +@allowed(['enabled', 'disabled']) param publicNetworkAccess string = 'enabled' resource aiSearch 'Microsoft.Search/searchServices@2024-03-01-preview' = { @@ -28,13 +25,5 @@ resource aiSearch 'Microsoft.Search/searchServices@2024-03-01-preview' = { } } -resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ - for role in roleAssignments: { - name: guid('${role.principalId}-${role.principalType}-${role.roleDefinitionId}') - scope: aiSearch - properties: role - } -] - output name string = aiSearch.name output id string = aiSearch.id diff --git a/infra/core/aks/aks.bicep b/infra/core/aks/aks.bicep index 5d817bbc..8d349b73 100644 --- a/infra/core/aks/aks.bicep +++ b/infra/core/aks/aks.bicep @@ -43,12 +43,6 @@ param graphragVMSize string = 'standard_d8s_v5' // 8 vcpu, 32 GB memory @description('The VM size of nodes running GraphRAG indexing jobs.') param graphragIndexingVMSize string = 'standard_e8s_v5' // 8 vcpus, 64 GB memory -@description('User name for the Linux Virtual Machines.') -param linuxAdminUsername string = 'azureuser' - -@description('Configure all linux machines with the SSH RSA public key string. Your key should include three parts, for example \'ssh-rsa AAAAB...snip...UcyupgH azureuser@linuxvm\'') -param sshRSAPublicKey string - @description('Enable encryption at host') param enableEncryptionAtHost bool = false @@ -56,12 +50,6 @@ param subnetId string param privateDnsZoneName string -@description('Array of objects with fields principalType, roleDefinitionId') -param ingressRoleAssignments array = [] - -@description('Array of objects with fields principalType, roleDefinitionId') -param systemRoleAssignments array = [] - @description('Array of object ids that will have admin role of the cluster') param clusterAdmins array = [] @@ -69,7 +57,7 @@ resource privateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' existing name: privateDnsZoneName } -resource aks 'Microsoft.ContainerService/managedClusters@2024-02-01' = { +resource aks 'Microsoft.ContainerService/managedClusters@2024-09-02-preview' = { name: clusterName location: location identity: { @@ -77,6 +65,7 @@ resource aks 'Microsoft.ContainerService/managedClusters@2024-02-01' = { } properties: { enableRBAC: true + disableLocalAccounts: true dnsPrefix: !empty(dnsPrefix) ? dnsPrefix : toLower(clusterName) aadProfile: { managed: true @@ -121,20 +110,10 @@ resource aks 'Microsoft.ContainerService/managedClusters@2024-02-01' = { ] } } - linuxProfile: { - adminUsername: linuxAdminUsername - ssh: { - publicKeys: [ - { - keyData: sshRSAPublicKey - } - ] - } - } networkProfile: { - serviceCidr: '10.3.0.0/16' // must not overlap with any subnet IP ranges - dnsServiceIP: '10.3.0.10' // must be within the range specified in serviceCidr - podCidr: '10.244.0.0/16' // IP range from which to assign pod IPs + serviceCidr: '10.3.0.0/16' // must not overlap with any subnet IP ranges + dnsServiceIP: '10.3.0.10' // must be within the range specified in serviceCidr + podCidr: '10.244.0.0/16' // IP range from which to assign pod IPs } autoUpgradeProfile: autoUpgradeProfile oidcIssuerProfile: { @@ -200,7 +179,7 @@ resource aks 'Microsoft.ContainerService/managedClusters@2024-02-01' = { } } -resource aksManagedAutoUpgradeSchedule 'Microsoft.ContainerService/managedClusters/maintenanceConfigurations@2024-03-02-preview' = { +resource aksManagedAutoUpgradeSchedule 'Microsoft.ContainerService/managedClusters/maintenanceConfigurations@2024-09-02-preview' = { parent: aks name: 'aksManagedAutoUpgradeSchedule' properties: { @@ -218,7 +197,7 @@ resource aksManagedAutoUpgradeSchedule 'Microsoft.ContainerService/managedCluste } } -resource aksManagedNodeOSUpgradeSchedule 'Microsoft.ContainerService/managedClusters/maintenanceConfigurations@2024-03-02-preview' = { +resource aksManagedNodeOSUpgradeSchedule 'Microsoft.ContainerService/managedClusters/maintenanceConfigurations@2024-09-02-preview' = { parent: aks name: 'aksManagedNodeOSUpgradeSchedule' properties: { @@ -236,35 +215,11 @@ resource aksManagedNodeOSUpgradeSchedule 'Microsoft.ContainerService/managedClus } } -// role assignment to ingress identity -resource webAppRoutingPrivateDnsContributor 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ - for role in ingressRoleAssignments: { - name: guid('${role.roleDefinitionId}-${privateDnsZone.id}') - scope: privateDnsZone - properties: { - principalId: aks.properties.ingressProfile.webAppRouting.identity.objectId - principalType: role.principalType - roleDefinitionId: role.roleDefinitionId - } - } -] - -// role assignment to AKS system identity -resource systemRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ - for role in systemRoleAssignments: { - name: guid('${role.roleDefinitionId}-${aks.id}') - scope: resourceGroup() - properties: { - principalId: aks.identity.principalId - principalType: role.principalType - roleDefinitionId: role.roleDefinitionId - } - } -] - output name string = aks.name output id string = aks.id output managedResourceGroup string = aks.properties.nodeResourceGroup output controlPlaneFqdn string = aks.properties.fqdn output kubeletPrincipalId string = aks.properties.identityProfile.kubeletidentity.objectId +output ingressWebAppIdentity string = aks.properties.ingressProfile.webAppRouting.identity.objectId +output systemIdentity string = aks.identity.principalId output issuer string = aks.properties.oidcIssuerProfile.issuerURL diff --git a/infra/core/apim/apim.bicep b/infra/core/apim/apim.bicep index 64a87ce7..17b2574c 100644 --- a/infra/core/apim/apim.bicep +++ b/infra/core/apim/apim.bicep @@ -22,11 +22,11 @@ param sku string = 'Developer' @description('The instance size of this API Management service. This should be a multiple of the number of availability zones getting deployed.') param skuCount int = 1 -@description('Application Insights resource name') -param appInsightsName string = 'apim-appi' +@description('Application Insights resource ID') +param appInsightsId string -@description('Application Insights public network access for ingestion') -param appInsightsPublicNetworkAccessForIngestion string = 'Disabled' +@description('Application Insights instrumentation key') +param appInsightsInstrumentationKey string @description('Azure region where the resources will be deployed') param location string = resourceGroup().location @@ -55,9 +55,6 @@ param publicIPAllocationMethod string = 'Static' @description('Unique DNS name for the public IP address used to access the API management service.') param dnsLabelPrefix string = toLower('${publicIpName}-${uniqueString(resourceGroup().id)}') -@description('The workspace id of the Log Analytics resource.') -param logAnalyticsWorkspaceId string - param restoreAPIM bool = false param subnetId string @@ -113,16 +110,16 @@ resource apiManagementService 'Microsoft.ApiManagement/service@2023-09-01-previe } } -resource apimLogger 'Microsoft.ApiManagement/service/loggers@2023-09-01-preview' = { - name: appInsights.name +resource apimLogger 'Microsoft.ApiManagement/service/loggers@2024-06-01-preview' = { + name: 'apimLogger' parent: apiManagementService properties: { - resourceId: appInsights.id - description: 'Application Insights for APIM' - loggerType: 'applicationInsights' credentials: { - instrumentationKey: appInsights.properties.InstrumentationKey + instrumentationKey: appInsightsInstrumentationKey } + description: 'Application Insights for APIM' + loggerType: 'applicationInsights' + resourceId: appInsightsId } } @@ -140,20 +137,6 @@ resource apimDiagnostics 'Microsoft.ApiManagement/service/diagnostics@2023-09-01 } } -resource appInsights 'Microsoft.Insights/components@2020-02-02' = { - name: appInsightsName - location: location - kind: 'web' - properties: { - Application_Type: 'web' - WorkspaceResourceId: logAnalyticsWorkspaceId - publicNetworkAccessForIngestion: appInsightsPublicNetworkAccessForIngestion - publicNetworkAccessForQuery: 'Enabled' - } -} - output name string = apiManagementService.name output id string = apiManagementService.id output apimGatewayUrl string = apiManagementService.properties.gatewayUrl -output appInsightsId string = appInsights.id -output appInsightsConnectionString string = appInsights.properties.ConnectionString diff --git a/infra/core/cosmosdb/cosmosdb.bicep b/infra/core/cosmosdb/cosmosdb.bicep index b64f02ae..e6ebc213 100644 --- a/infra/core/cosmosdb/cosmosdb.bicep +++ b/infra/core/cosmosdb/cosmosdb.bicep @@ -7,20 +7,10 @@ param cosmosDbName string @description('The location of the CosmosDB resource.') param location string = resourceGroup().location -@allowed([ 'Enabled', 'Disabled' ]) +@allowed(['Enabled', 'Disabled']) param publicNetworkAccess string = 'Disabled' -@description('Role definition id to assign to the principal. Learn more: https://learn.microsoft.com/en-us/azure/cosmos-db/how-to-setup-rbac') -@allowed([ - '00000000-0000-0000-0000-000000000001' // 'Cosmos DB Built-in Data Reader' role - '00000000-0000-0000-0000-000000000002' // 'Cosmos DB Built-in Data Contributor' role -]) -param roleDefinitionId string = '00000000-0000-0000-0000-000000000002' - -param principalId string - - -resource cosmosDb 'Microsoft.DocumentDB/databaseAccounts@2022-11-15' = { +resource cosmosDb 'Microsoft.DocumentDB/databaseAccounts@2024-11-15' = { name: cosmosDbName location: location tags: { @@ -79,102 +69,6 @@ resource cosmosDb 'Microsoft.DocumentDB/databaseAccounts@2022-11-15' = { } } -resource graphragDatabase 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases@2022-11-15' = { - parent: cosmosDb - name: 'graphrag' - properties: { - resource: { - id: 'graphrag' - } - } -} - -resource jobsContainer 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2022-11-15' = { - parent: graphragDatabase - name: 'jobs' - properties: { - resource: { - id: 'jobs' - indexingPolicy: { - indexingMode: 'consistent' - automatic: true - includedPaths: [ - { - path: '/*' - } - ] - excludedPaths: [ - { - path: '/"_etag"/?' - } - ] - } - partitionKey: { - paths: [ - '/id' - ] - kind: 'Hash' - version: 2 - } - uniqueKeyPolicy: { - uniqueKeys: [] - } - conflictResolutionPolicy: { - mode: 'LastWriterWins' - conflictResolutionPath: '/_ts' - } - } - } -} - -resource containerStoreContainer 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2022-11-15' = { - parent: graphragDatabase - name: 'container-store' - properties: { - resource: { - id: 'container-store' - indexingPolicy: { - indexingMode: 'consistent' - automatic: true - includedPaths: [ - { - path: '/*' - } - ] - excludedPaths: [ - { - path: '/"_etag"/?' - } - ] - } - partitionKey: { - paths: [ - '/id' - ] - kind: 'Hash' - version: 2 - } - uniqueKeyPolicy: { - uniqueKeys: [] - } - conflictResolutionPolicy: { - mode: 'LastWriterWins' - conflictResolutionPath: '/_ts' - } - } - } -} - -resource sqlRoleAssignment 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2023-11-15' = { - name: guid('${roleDefinitionId}-${principalId}-${cosmosDb.id}') - parent: cosmosDb - properties: { - roleDefinitionId: '/${subscription().id}/resourceGroups/${resourceGroup().name}/providers/Microsoft.DocumentDB/databaseAccounts/${cosmosDb.name}/sqlRoleDefinitions/${roleDefinitionId}' - principalId: principalId - scope: cosmosDb.id - } -} - output name string = cosmosDb.name output id string = cosmosDb.id output endpoint string = cosmosDb.properties.documentEndpoint diff --git a/infra/core/identity/identity.bicep b/infra/core/identity/identity.bicep index b6f8e56a..0cb0fb57 100644 --- a/infra/core/identity/identity.bicep +++ b/infra/core/identity/identity.bicep @@ -10,7 +10,6 @@ param location string = resourceGroup().location @description('federated name: FederatedIdentityCredentialProperties. See https://learn.microsoft.com/en-us/azure/templates/microsoft.managedidentity/userassignedidentities/federatedidentitycredentials?pivots=deployment-language-bicep#federatedidentitycredentialproperties') param federatedCredentials object = {} - resource identity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' = { name: name location: location diff --git a/infra/core/log-analytics/log.bicep b/infra/core/log-analytics/log.bicep index cd33261a..0c85dbfa 100644 --- a/infra/core/log-analytics/log.bicep +++ b/infra/core/log-analytics/log.bicep @@ -10,7 +10,6 @@ param location string = resourceGroup().location @description('The public network access for ingestion.') param publicNetworkAccessForIngestion string = 'Disabled' - resource logAnalyticsWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' = { name: name location: location diff --git a/infra/core/monitor/app-insights.bicep b/infra/core/monitor/app-insights.bicep new file mode 100644 index 00000000..c9a19df6 --- /dev/null +++ b/infra/core/monitor/app-insights.bicep @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +@description('Application Insights resource name') +param appInsightsName string = 'appi' + +@description('Azure region where the resources will be deployed') +param location string = resourceGroup().location + +@description('Application Insights public network access for ingestion') +param appInsightsPublicNetworkAccessForIngestion string = 'Disabled' + +@description('Workspace id of a Log Analytics resource.') +param logAnalyticsWorkspaceId string + +resource appInsights 'Microsoft.Insights/components@2020-02-02' = { + name: appInsightsName + location: location + kind: 'web' + properties: { + Application_Type: 'web' + WorkspaceResourceId: logAnalyticsWorkspaceId + publicNetworkAccessForIngestion: appInsightsPublicNetworkAccessForIngestion + publicNetworkAccessForQuery: 'Enabled' + } +} + +output id string = appInsights.id +output connectionString string = appInsights.properties.ConnectionString +output instrumentationKey string = appInsights.properties.InstrumentationKey diff --git a/infra/core/monitor/private-link-scope.bicep b/infra/core/monitor/private-link-scope.bicep index 54e18c17..76050c8e 100644 --- a/infra/core/monitor/private-link-scope.bicep +++ b/infra/core/monitor/private-link-scope.bicep @@ -6,7 +6,6 @@ param privateLinkScopedResources array = [] param queryAccessMode string = 'Open' param ingestionAccessMode string = 'PrivateOnly' - resource privateLinkScope 'microsoft.insights/privateLinkScopes@2021-07-01-preview' = { name: privateLinkScopeName location: 'global' diff --git a/infra/core/rbac/aks-rbac.bicep b/infra/core/rbac/aks-rbac.bicep new file mode 100644 index 00000000..9124c4fc --- /dev/null +++ b/infra/core/rbac/aks-rbac.bicep @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +@description('Array of objects with fields principalId, principalType, roleDefinitionId') +param roleAssignments array = [] + +resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ + for role in roleAssignments: { + // note: the guid must be globally unique and deterministic (reproducible) across Azure + name: guid( + subscription().subscriptionId, + resourceGroup().name, + role.principalId, + role.principalType, + role.roleDefinitionId + ) + scope: resourceGroup() + properties: role + } +] diff --git a/infra/core/rbac/workload-identity-rbac.bicep b/infra/core/rbac/workload-identity-rbac.bicep new file mode 100644 index 00000000..92feaefb --- /dev/null +++ b/infra/core/rbac/workload-identity-rbac.bicep @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +@description('ID of the service principal to assign the RBAC roles to.') +param principalId string + +@description('Type of principal to assign the RBAC roles to.') +@allowed(['ServicePrincipal', 'User', 'Group', 'Device', 'ForeignGroup']) +param principalType string + +@description('Name of an existing CosmosDB resource.') +param cosmosDbName string + +@description('Role definitions for various roles that will be assigned at deployment time. Learn more: https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles') +var roleDefinitions = [ + { + id: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' // Storage Blob Data Contributor Role + } + { + id: 'b24988ac-6180-42a0-ab88-20f7382dd24c' // AI Search Contributor Role + } + { + id: '8ebe5a00-799e-43f5-93ac-243d3dce84a7' // AI Search Index Data Contributor Role + } + { + id: '1407120a-92aa-4202-b7e9-c0e197c71c8f' // AI Search Index Data Reader Role + } + { + id: 'a001fd3d-188f-4b5d-821b-7da978bf7442' // Cognitive Services OpenAI Contributor + } + { + id: '3913510d-42f4-4e42-8a64-420c390055eb' // Monitoring Metrics Publisher Role + } +] + +resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ + for roleDef in roleDefinitions: { + // note: the guid must be globally unique and deterministic (reproducible) across Azure + name: guid(subscription().subscriptionId, resourceGroup().name, principalId, principalType, roleDef.id) + scope: resourceGroup() + properties: { + principalId: principalId + principalType: principalType + roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', roleDef.id) + } + } +] + +resource cosmosDb 'Microsoft.DocumentDB/databaseAccounts@2024-12-01-preview' existing = { + name: cosmosDbName +} + +var customRoleName = 'Custom cosmosDB role for graphrag - adds read/write permissions at the database and container level' +resource customCosmosRoleDefinition 'Microsoft.DocumentDB/databaseAccounts/sqlRoleDefinitions@2024-12-01-preview' = { + // note: the guid must be globally unique and deterministic (reproducible) across Azure + name: guid(subscription().subscriptionId, resourceGroup().name, cosmosDb.id, customRoleName) // guid is used to ensure uniqueness + parent: cosmosDb + properties: { + roleName: customRoleName + type: 'CustomRole' + assignableScopes: [ + cosmosDb.id + ] + permissions: [ + { + dataActions: [ + 'Microsoft.DocumentDB/databaseAccounts/readMetadata' + 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/*' + 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/items/*' + 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/write' + ] + } + ] + } +} + +resource assignment 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2024-12-01-preview' = { + // note: the guid must be globally unique and deterministic (reproducible) across Azure + name: guid( + subscription().subscriptionId, + resourceGroup().name, + cosmosDb.id, + customCosmosRoleDefinition.id, + principalId + ) + parent: cosmosDb + properties: { + principalId: principalId + roleDefinitionId: customCosmosRoleDefinition.id + scope: cosmosDb.id + } +} diff --git a/infra/core/storage/storage.bicep b/infra/core/storage/storage.bicep index 21099044..4f23978f 100644 --- a/infra/core/storage/storage.bicep +++ b/infra/core/storage/storage.bicep @@ -7,18 +7,15 @@ param name string @description('The location of the Storage Account resource.') param location string = resourceGroup().location -@allowed([ 'Hot', 'Cool', 'Premium' ]) +@allowed(['Hot', 'Cool', 'Premium']) param accessTier string = 'Hot' -@allowed([ 'AzureDnsZone', 'Standard' ]) +@allowed(['AzureDnsZone', 'Standard']) param dnsEndpointType string = 'Standard' -@allowed([ 'Enabled', 'Disabled' ]) +@allowed(['Enabled', 'Disabled']) param publicNetworkAccess string = 'Disabled' -@description('Array of objects with fields principalId, principalType, roleDefinitionId') -param roleAssignments array = [] - param tags object = {} param allowBlobPublicAccess bool = false param allowCrossTenantReplication bool = true @@ -29,7 +26,6 @@ param kind string = 'StorageV2' param minimumTlsVersion string = 'TLS1_2' param containers array = [] - resource storage 'Microsoft.Storage/storageAccounts@2023-01-01' = { name: name location: location @@ -68,14 +64,6 @@ resource storage 'Microsoft.Storage/storageAccounts@2023-01-01' = { } } -resource storageRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ - for role in roleAssignments: { - name: guid('${role.principalId}-${role.principalType}-${role.roleDefinitionId}') - scope: storage - properties: role - } -] - output name string = storage.name output id string = storage.id output primaryEndpoints object = storage.properties.primaryEndpoints diff --git a/infra/core/vnet/private-dns-vnet-link.bicep b/infra/core/vnet/private-dns-vnet-link.bicep index fcd29d00..9546a86f 100644 --- a/infra/core/vnet/private-dns-vnet-link.bicep +++ b/infra/core/vnet/private-dns-vnet-link.bicep @@ -5,7 +5,6 @@ param vnetId string param privateDnsZoneName string var vnet_id_hash = uniqueString(vnetId) - resource dnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = { name: privateDnsZoneName location: 'global' diff --git a/infra/core/vnet/private-dns-zone-a-record.bicep b/infra/core/vnet/private-dns-zone-a-record.bicep index c1e1739e..04356590 100644 --- a/infra/core/vnet/private-dns-zone-a-record.bicep +++ b/infra/core/vnet/private-dns-zone-a-record.bicep @@ -13,7 +13,6 @@ param ttl int = 900 @description('The IP address') param ipv4Address string - resource dnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' existing = { name: dnsZoneName } diff --git a/infra/core/vnet/private-dns-zone.bicep b/infra/core/vnet/private-dns-zone.bicep index 35d7f525..431c1015 100644 --- a/infra/core/vnet/private-dns-zone.bicep +++ b/infra/core/vnet/private-dns-zone.bicep @@ -7,7 +7,6 @@ param name string @description('The name of the virtual networks the DNS zone should be associated with.') param vnetNames string[] - resource dnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = { name: name location: 'global' diff --git a/infra/core/vnet/private-endpoint.bicep b/infra/core/vnet/private-endpoint.bicep index 0d0eb32c..6d7b6c02 100644 --- a/infra/core/vnet/private-endpoint.bicep +++ b/infra/core/vnet/private-endpoint.bicep @@ -14,7 +14,6 @@ param privateEndpointName string param groupId string param location string = resourceGroup().location - resource privateEndpoint 'Microsoft.Network/privateEndpoints@2021-05-01' = { name: privateEndpointName location: location diff --git a/infra/core/vnet/privatelink-private-dns-zones.bicep b/infra/core/vnet/privatelink-private-dns-zones.bicep index a6c57f1a..89f15e21 100644 --- a/infra/core/vnet/privatelink-private-dns-zones.bicep +++ b/infra/core/vnet/privatelink-private-dns-zones.bicep @@ -11,8 +11,12 @@ var storagePrivateDnsZoneNames = [blobStoragePrivateDnsZoneName] var privateDnsZoneData = loadJsonContent('private-dns-zone-groups.json') var cloudName = toLower(environment().name) var azureMonitorPrivateDnsZones = privateDnsZoneData[cloudName].azureMonitor -var privateDnsZones = union(azureMonitorPrivateDnsZones, storagePrivateDnsZoneNames, [cosmosDbPrivateDnsZoneName], [aiSearchPrivateDnsZoneName]) - +var privateDnsZones = union( + azureMonitorPrivateDnsZones, + storagePrivateDnsZoneNames, + [cosmosDbPrivateDnsZoneName], + [aiSearchPrivateDnsZoneName] +) resource privateDnsZoneResources 'Microsoft.Network/privateDnsZones@2020-06-01' = [ for name in privateDnsZones: { diff --git a/infra/core/vnet/vnet-dns-link.bicep b/infra/core/vnet/vnet-dns-link.bicep index 27448b8e..5ce16f1e 100644 --- a/infra/core/vnet/vnet-dns-link.bicep +++ b/infra/core/vnet/vnet-dns-link.bicep @@ -4,7 +4,6 @@ param privateDnsZoneName string param vnetIds array - resource privateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' existing = { name: privateDnsZoneName } diff --git a/infra/core/vnet/vnet.bicep b/infra/core/vnet/vnet.bicep new file mode 100644 index 00000000..88e9abe9 --- /dev/null +++ b/infra/core/vnet/vnet.bicep @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +@description('Name of the vnet resource.') +param vnetName string + +@description('Azure region where the resource will be deployed.') +param location string = resourceGroup().location + +@description('Optional prefix to prepend to subnet names.') +param subnetPrefix string = 'snet-' + +@description('APIM tier - used to determine if subnet delegations are required.') +@allowed(['Developer', 'StandardV2']) +param apimTier string + +@description('NSG resource ID.') +param nsgID string + +resource vnet 'Microsoft.Network/virtualNetworks@2024-01-01' = { + name: vnetName + location: location + properties: { + addressSpace: { + addressPrefixes: [ + '10.1.0.0/16' + ] + } + subnets: [ + { + name: '${subnetPrefix}apim' + properties: { + addressPrefix: '10.1.0.0/24' + networkSecurityGroup: { + id: nsgID + } + delegations: (apimTier == 'Developer') + ? [] + : [ + { + name: 'Microsoft.Web/serverFarms' + properties: { + serviceName: 'Microsoft.Web/serverFarms' + } + } + ] + } + } + { + name: '${subnetPrefix}aks' + properties: { + addressPrefix: '10.1.1.0/24' + serviceEndpoints: [ + { + service: 'Microsoft.Storage' + } + { + service: 'Microsoft.Sql' + } + { + service: 'Microsoft.EventHub' + } + ] + } + } + ] + } +} + +output vnetId string = vnet.id +output vnetName string = vnet.name +output apimSubnetId string = vnet.properties.subnets[0].id +output aksSubnetId string = vnet.properties.subnets[1].id diff --git a/infra/deploy.sh b/infra/deploy.sh index 888c7589..a749d8f7 100755 --- a/infra/deploy.sh +++ b/infra/deploy.sh @@ -16,8 +16,7 @@ GRAPHRAG_IMAGE="" PUBLISHER_EMAIL="" PUBLISHER_NAME="" RESOURCE_BASE_NAME="" -REPORTERS="" -GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT="" +COGNITIVE_SERVICES_AUDIENCE="" CONTAINER_REGISTRY_NAME="" requiredParams=( @@ -240,13 +239,9 @@ populateOptionalParams () { if [ ! -z "$RESOURCE_BASE_NAME" ]; then printf "\tsetting RESOURCE_BASE_NAME=$RESOURCE_BASE_NAME\n" fi - if [ -z "$REPORTERS" ]; then - REPORTERS="blob,console,app_insights" - printf "\tsetting REPORTERS=blob,console,app_insights\n" - fi - if [ -z "$GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT" ]; then - GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT="https://cognitiveservices.azure.com/.default" - printf "\tsetting GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT=$GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT\n" + if [ -z "$COGNITIVE_SERVICES_AUDIENCE" ]; then + COGNITIVE_SERVICES_AUDIENCE="https://cognitiveservices.azure.com/.default" + printf "\tsetting COGNITIVE_SERVICES_AUDIENCE=$COGNITIVE_SERVICES_AUDIENCE\n" fi if [ -z "$GRAPHRAG_IMAGE" ]; then GRAPHRAG_IMAGE="graphrag:backend" @@ -264,38 +259,22 @@ createResourceGroupIfNotExists () { local location=$1 local rg=$2 printf "Checking if resource group $rg exists... " - az group show -n $rg -o json >/dev/null 2>&1 + az group show -n $rg -o json > /dev/null 2>&1 if [ $? -ne 0 ]; then printf "No.\n" printf "Creating resource group... " - az group create -l $location -n $rg >/dev/null 2>&1 + az group create -l $location -n $rg > /dev/null 2>&1 printf "Done.\n" else printf "Yes.\n" fi } -createSshkeyIfNotExists () { - local rg=$1 - local keyName="aks-publickey" - printf "Checking if sshkey exists... " - local keyDetails=$(az sshkey show -g $rg --name $keyName -o json 2> /dev/null) - if [ -z "$keyDetails" ]; then - printf "No.\n" - printf "Creating sshkey... " - local keyDetails=$(az sshkey create -g $rg --name $keyName -o json) - exitIfCommandFailed $? "Error creating sshkey." - else - printf "Yes.\n" - fi - SSHKEY_DETAILS=$keyDetails -} - getAksCredentials () { local rg=$1 local aks=$2 printf "Getting AKS credentials... " - az aks get-credentials -g $rg -n $aks --overwrite-existing 2>&1 + az aks get-credentials -g $rg -n $aks --overwrite-existing > /dev/null 2>&1 exitIfCommandFailed $? "Error getting AKS credentials, exiting..." kubelogin convert-kubeconfig -l azurecli exitIfCommandFailed $? "Error logging into AKS, exiting..." @@ -334,8 +313,6 @@ checkForApimSoftDelete () { deployAzureResources () { echo "Deploying Azure resources..." - local SSH_PUBLICKEY=$(jq -r .publicKey <<< $SSHKEY_DETAILS) - exitIfValueEmpty "$SSH_PUBLICKEY" "Unable to read ssh publickey, exiting..." # get principal/object id of the signed in user local deployerPrincipalId=$(az ad signed-in-user show --output json | jq -r .id) exitIfValueEmpty $deployerPrincipalId "Principal ID of deployer not found" @@ -347,12 +324,11 @@ deployAzureResources () { --resource-group $RESOURCE_GROUP \ --template-file ./main.bicep \ --parameters "resourceBaseName=$RESOURCE_BASE_NAME" \ - --parameters "graphRagName=$RESOURCE_GROUP" \ + --parameters "resourceGroup=$RESOURCE_GROUP" \ --parameters "apimName=$APIM_NAME" \ --parameters "apimTier=$APIM_TIER" \ - --parameters "publisherName=$PUBLISHER_NAME" \ - --parameters "aksSshRsaPublicKey=$SSH_PUBLICKEY" \ - --parameters "publisherEmail=$PUBLISHER_EMAIL" \ + --parameters "apiPublisherName=$PUBLISHER_NAME" \ + --parameters "apiPublisherEmail=$PUBLISHER_EMAIL" \ --parameters "enablePrivateEndpoints=$ENABLE_PRIVATE_ENDPOINTS" \ --parameters "acrName=$CONTAINER_REGISTRY_NAME" \ --parameters "deployerPrincipalId=$deployerPrincipalId" \ @@ -445,7 +421,7 @@ installGraphRAGHelmChart () { local cosmosEndpoint=$(jq -r .azure_cosmosdb_endpoint.value <<< $AZURE_OUTPUTS) exitIfValueEmpty "$cosmosEndpoint" "Unable to parse CosmosDB endpoint from Azure outputs, exiting..." - local graphragHostname=$(jq -r .azure_graphrag_hostname.value <<< $AZURE_OUTPUTS) + local graphragHostname=$(jq -r .azure_app_hostname.value <<< $AZURE_OUTPUTS) exitIfValueEmpty "$graphragHostname" "Unable to parse graphrag hostname from deployment outputs, exiting..." local storageAccountBlobUrl=$(jq -r .azure_storage_account_blob_url.value <<< $AZURE_OUTPUTS) @@ -459,7 +435,6 @@ installGraphRAGHelmChart () { exitIfValueEmpty "$graphragImageName" "Unable to parse graphrag image name, exiting..." exitIfValueEmpty "$graphragImageVersion" "Unable to parse graphrag image version, exiting..." - local escapedReporters=$(sed "s/,/\\\,/g" <<< "$REPORTERS") reset_x=true if ! [ -o xtrace ]; then set -x @@ -474,18 +449,17 @@ installGraphRAGHelmChart () { --set "master.image.repository=$containerRegistryName/$graphragImageName" \ --set "master.image.tag=$graphragImageVersion" \ --set "ingress.host=$graphragHostname" \ - --set "graphragConfig.APP_INSIGHTS_CONNECTION_STRING=$appInsightsConnectionString" \ + --set "graphragConfig.APPLICATIONINSIGHTS_CONNECTION_STRING=$appInsightsConnectionString" \ --set "graphragConfig.AI_SEARCH_URL=https://$aiSearchName.$AISEARCH_ENDPOINT_SUFFIX" \ --set "graphragConfig.AI_SEARCH_AUDIENCE=$AISEARCH_AUDIENCE" \ --set "graphragConfig.COSMOS_URI_ENDPOINT=$cosmosEndpoint" \ --set "graphragConfig.GRAPHRAG_API_BASE=$GRAPHRAG_API_BASE" \ --set "graphragConfig.GRAPHRAG_API_VERSION=$GRAPHRAG_API_VERSION" \ - --set "graphragConfig.GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT=$GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT" \ + --set "graphragConfig.COGNITIVE_SERVICES_AUDIENCE=$COGNITIVE_SERVICES_AUDIENCE" \ --set "graphragConfig.GRAPHRAG_LLM_MODEL=$GRAPHRAG_LLM_MODEL" \ --set "graphragConfig.GRAPHRAG_LLM_DEPLOYMENT_NAME=$GRAPHRAG_LLM_DEPLOYMENT_NAME" \ --set "graphragConfig.GRAPHRAG_EMBEDDING_MODEL=$GRAPHRAG_EMBEDDING_MODEL" \ --set "graphragConfig.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME=$GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME" \ - --set "graphragConfig.REPORTERS=$escapedReporters" \ --set "graphragConfig.STORAGE_ACCOUNT_BLOB_URL=$storageAccountBlobUrl" local helmResult=$? @@ -559,7 +533,7 @@ deployGraphragAPI () { local apimName=$(jq -r .azure_apim_name.value <<< $AZURE_OUTPUTS) exitIfValueEmpty "$apimName" "Error parsing apim name from azure outputs, exiting..." local backendSwaggerUrl="$apimGatewayUrl/manpage/openapi.json" - local graphragUrl=$(jq -r .azure_graphrag_url.value <<< $AZURE_OUTPUTS) + local graphragUrl=$(jq -r .azure_app_url.value <<< $AZURE_OUTPUTS) exitIfValueEmpty "$graphragUrl" "Error parsing GraphRAG URL from azure outputs, exiting..." waitForGraphragBackend $backendSwaggerUrl @@ -707,9 +681,6 @@ validateSKUs $LOCATION $VALIDATE_SKUS_FLAG # Create resource group createResourceGroupIfNotExists $LOCATION $RESOURCE_GROUP -# Generate ssh key for AKS -createSshkeyIfNotExists $RESOURCE_GROUP - # Deploy Azure resources checkForApimSoftDelete deployAzureResources diff --git a/infra/helm/graphrag/values.yaml b/infra/helm/graphrag/values.yaml index 6ccdeddf..f93d4816 100644 --- a/infra/helm/graphrag/values.yaml +++ b/infra/helm/graphrag/values.yaml @@ -32,16 +32,17 @@ ingress: graphragConfig: AI_SEARCH_AUDIENCE: "" AI_SEARCH_URL: "" - APP_INSIGHTS_CONNECTION_STRING: "" + APPLICATIONINSIGHTS_CONNECTION_STRING: "" + # Must set hidden env variable to true to disable statsbeat. For more information: https://github.com/Azure/azure-sdk-for-python/issues/34804 + APPLICATIONINSIGHTS_STATSBEAT_DISABLED_ALL: "True" COSMOS_URI_ENDPOINT: "" GRAPHRAG_API_BASE: "" GRAPHRAG_API_VERSION: "" - GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT: "https://cognitiveservices.azure.com/.default" + COGNITIVE_SERVICES_AUDIENCE: "https://cognitiveservices.azure.com/.default" GRAPHRAG_LLM_MODEL: "" GRAPHRAG_LLM_DEPLOYMENT_NAME: "" GRAPHRAG_EMBEDDING_MODEL: "" GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: "" - REPORTERS: "blob,console,app_insights" STORAGE_ACCOUNT_BLOB_URL: "" master: @@ -54,10 +55,12 @@ master: tag: "" podAnnotations: {} podLabels: {} - podSecurityContext: {} + podSecurityContext: + {} # fsGroup: 2000 - securityContext: {} + securityContext: + {} # capabilities: # drop: # - ALL @@ -125,8 +128,8 @@ master: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - - matchExpressions: - - key: workload - operator: In - values: - - graphrag + - matchExpressions: + - key: workload + operator: In + values: + - graphrag diff --git a/infra/main.bicep b/infra/main.bicep index a312ffdf..8a1b019c 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -17,96 +17,112 @@ Private Endpoints Managed Identity */ -@description('Unique name to append to each resource') -param resourceBaseName string = '' -var resourceBaseNameFinal = !empty(resourceBaseName) ? resourceBaseName : toLower(uniqueString('${subscription().id}/resourceGroups/${graphRagName}')) - @minLength(1) @maxLength(64) @description('Name of the resource group that GraphRAG will be deployed in.') -param graphRagName string +param resourceGroup string + +@description('Unique name to append to each resource') +param resourceBaseName string = '' +var resourceBaseNameFinal = !empty(resourceBaseName) + ? resourceBaseName + : toLower(uniqueString('${subscription().id}/resourceGroups/${resourceGroup}')) @description('Cloud region for all resources') -param location string = resourceGroup().location +param location string = az.resourceGroup().location @description('Principal/Object ID of the deployer. Will be used to assign admin roles to the AKS cluster.') param deployerPrincipalId string @minLength(1) @description('Name of the publisher of the API Management instance.') -param publisherName string +param apiPublisherName string = 'Microsoft' @minLength(1) @description('Email address of the publisher of the API Management instance.') -param publisherEmail string +param apiPublisherEmail string = 'publisher@microsoft.com' -@description('The AKS namespace the workload identity service account will be created in.') +@description('The AKS namespace to install GraphRAG in.') param aksNamespace string = 'graphrag' -@description('Public key to allow access to AKS Linux nodes.') -param aksSshRsaPublicKey string - @description('Whether to enable private endpoints.') param enablePrivateEndpoints bool = true @description('Whether to restore the API Management instance.') param restoreAPIM bool = false -param acrName string = '' -param apimName string = '' +// optional parameters that will default to a generated name if not provided param apimTier string = 'Developer' +param apimName string = '' +param acrName string = '' param storageAccountName string = '' param cosmosDbName string = '' param aiSearchName string = '' -var graphRagDnsLabel = 'graphrag' -var dnsDomain = 'graphrag.io' -var graphRagHostname = '${graphRagDnsLabel}.${dnsDomain}' -var graphRagUrl = 'http://${graphRagHostname}' + var abbrs = loadJsonContent('abbreviations.json') -var tags = { 'azd-env-name': graphRagName } +var tags = { 'azd-env-name': resourceGroup } var workloadIdentityName = '${abbrs.managedIdentityUserAssignedIdentities}${resourceBaseNameFinal}' var aksServiceAccountName = '${aksNamespace}-workload-sa' var workloadIdentitySubject = 'system:serviceaccount:${aksNamespace}:${aksServiceAccountName}' -@description('Role definitions for various roles that will be assigned at deployment time. Learn more: https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles') + +// endpoint configuration +var dnsDomain = 'graphrag.io' +var appHostname = 'graphrag.${dnsDomain}' +var appUrl = 'http://${appHostname}' + +@description('Role definitions for various RBAC roles that will be assigned at deployment time. Learn more: https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles') var roles = { - storageBlobDataContributor: resourceId( - 'Microsoft.Authorization/roleDefinitions', - 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' - ) - aiSearchContributor: resourceId( - 'Microsoft.Authorization/roleDefinitions', - 'b24988ac-6180-42a0-ab88-20f7382dd24c' // AI Search Contributor Role - ) - aiSearchIndexDataContributor: resourceId( - 'Microsoft.Authorization/roleDefinitions', - '8ebe5a00-799e-43f5-93ac-243d3dce84a7' // AI Search Index Data Contributor Role - ) - aiSearchIndexDataReader: resourceId ( - 'Microsoft.Authorization/roleDefinitions', - '1407120a-92aa-4202-b7e9-c0e197c71c8f' // AI Search Index Data Reader Role - ) - privateDnsZoneContributor: resourceId ( - 'Microsoft.Authorization/roleDefinitions', - 'b12aa53e-6015-4669-85d0-8515ebb3ae7f' // Private DNS Zone Contributor Role - ) - networkContributor: resourceId ( + privateDnsZoneContributor: resourceId( 'Microsoft.Authorization/roleDefinitions', - '4d97b98b-1d4f-4787-a291-c67834d212e7' // Network Contributor Role + 'b12aa53e-6015-4669-85d0-8515ebb3ae7f' // Private DNS Zone Contributor Role ) - cognitiveServicesOpenaiContributor: resourceId ( + networkContributor: resourceId( 'Microsoft.Authorization/roleDefinitions', - 'a001fd3d-188f-4b5d-821b-7da978bf7442' // Cognitive Services OpenAI Contributor + '4d97b98b-1d4f-4787-a291-c67834d212e7' // Network Contributor Role ) - acrPull: resourceId ( + acrPull: resourceId( 'Microsoft.Authorization/roleDefinitions', - '7f951dda-4ed3-4680-a7ca-43fe172d538d' // ACR Pull Role + '7f951dda-4ed3-4680-a7ca-43fe172d538d' // ACR Pull Role ) } +// apply RBAC role assignments to the AKS workload identity +module aksWorkloadIdentityRBAC 'core/rbac/workload-identity-rbac.bicep' = { + name: 'aks-workload-identity-rbac-assignments' + params: { + principalId: workloadIdentity.outputs.principalId + principalType: 'ServicePrincipal' + cosmosDbName: cosmosdb.outputs.name + } +} + +// apply necessary RBAC role assignments to the AKS service +module aksRBAC 'core/rbac/aks-rbac.bicep' = { + name: 'aks-rbac-assignments' + params: { + roleAssignments: [ + { + principalId: aks.outputs.kubeletPrincipalId + principalType: 'ServicePrincipal' + roleDefinitionId: roles.acrPull + } + { + principalId: aks.outputs.ingressWebAppIdentity + principalType: 'ServicePrincipal' + roleDefinitionId: roles.privateDnsZoneContributor + } + { + principalId: aks.outputs.systemIdentity + principalType: 'ServicePrincipal' + roleDefinitionId: roles.networkContributor + } + ] + } +} module log 'core/log-analytics/log.bicep' = { - name: 'log-analytics' - params:{ + name: 'log-analytics-deployment' + params: { name: '${abbrs.operationalInsightsWorkspaces}${resourceBaseNameFinal}' location: location publicNetworkAccessForIngestion: enablePrivateEndpoints ? 'Disabled' : 'Enabled' @@ -114,153 +130,73 @@ module log 'core/log-analytics/log.bicep' = { } module nsg 'core/vnet/nsg.bicep' = { - name: 'nsg' + name: 'nsg-deployment' params: { nsgName: '${abbrs.networkNetworkSecurityGroups}${resourceBaseNameFinal}' location: location } } -resource vnet 'Microsoft.Network/virtualNetworks@2024-01-01' = { - name: '${abbrs.networkVirtualNetworks}${resourceBaseNameFinal}' - location: location - properties: { - addressSpace: { - addressPrefixes: [ - '10.1.0.0/16' - ] - } - subnets: [ - { - name: '${abbrs.networkVirtualNetworksSubnets}apim' - properties: { - addressPrefix: '10.1.0.0/24' - networkSecurityGroup: { - id: nsg.outputs.id - } - delegations: (apimTier=='Developer') ? [] : [ - { - name: 'Microsoft.Web/serverFarms' - properties: { - serviceName: 'Microsoft.Web/serverFarms' - } - } - ] - } - } - { - name: '${abbrs.networkVirtualNetworksSubnets}aks' - properties: { - addressPrefix: '10.1.1.0/24' - serviceEndpoints: [ - { - service: 'Microsoft.Storage' - } - { - service: 'Microsoft.Sql' - } - { - service: 'Microsoft.EventHub' - } - ] - } - } - ] +module vnet 'core/vnet/vnet.bicep' = { + name: 'vnet-deployment' + params: { + vnetName: '${abbrs.networkVirtualNetworks}${resourceBaseNameFinal}' + location: location + subnetPrefix: abbrs.networkVirtualNetworksSubnets + apimTier: apimTier + nsgID: nsg.outputs.id } } module acr 'core/acr/acr.bicep' = { - name: 'acr' + name: 'acr-deployment' params: { registryName: !empty(acrName) ? acrName : '${abbrs.containerRegistryRegistries}${resourceBaseNameFinal}' location: location - roleAssignments: [ - { - principalId: aks.outputs.kubeletPrincipalId - principalType: 'ServicePrincipal' - roleDefinitionId: roles.acrPull - } - ] } } module aks 'core/aks/aks.bicep' = { - name: 'aks' - params:{ + name: 'aks-deployment' + params: { clusterName: '${abbrs.containerServiceManagedClusters}${resourceBaseNameFinal}' location: location - graphragVMSize: 'standard_d8s_v5' // 8 vcpu, 32 GB memory - graphragIndexingVMSize: 'standard_e8s_v5' // 8 vcpus, 64 GB memory - clusterAdmins: ['${deployerPrincipalId}'] - sshRSAPublicKey: aksSshRsaPublicKey + graphragVMSize: 'standard_d8s_v5' // 8 vcpu, 32 GB memory + graphragIndexingVMSize: 'standard_e8s_v5' // 8 vcpus, 64 GB memory + clusterAdmins: !empty(deployerPrincipalId) ? ['${deployerPrincipalId}'] : null logAnalyticsWorkspaceId: log.outputs.id - subnetId: vnet.properties.subnets[1].id // aks subnet + subnetId: vnet.outputs.aksSubnetId privateDnsZoneName: privateDnsZone.outputs.name - ingressRoleAssignments: [ - { - principalType: 'ServicePrincipal' - roleDefinitionId: roles.privateDnsZoneContributor - } - ] - systemRoleAssignments: [ - { - principalType: 'ServicePrincipal' - roleDefinitionId: roles.networkContributor - } - ] } } module cosmosdb 'core/cosmosdb/cosmosdb.bicep' = { - name: 'cosmosdb' + name: 'cosmosdb-deployment' params: { cosmosDbName: !empty(cosmosDbName) ? cosmosDbName : '${abbrs.documentDBDatabaseAccounts}${resourceBaseNameFinal}' location: location publicNetworkAccess: enablePrivateEndpoints ? 'Disabled' : 'Enabled' - principalId: workloadIdentity.outputs.principalId } } module aiSearch 'core/ai-search/ai-search.bicep' = { - name: 'aisearch' + name: 'aisearch-deployment' params: { name: !empty(aiSearchName) ? aiSearchName : '${abbrs.searchSearchServices}${resourceBaseNameFinal}' location: location publicNetworkAccess: enablePrivateEndpoints ? 'disabled' : 'enabled' - roleAssignments: [ - { - principalId: workloadIdentity.outputs.principalId - principalType: 'ServicePrincipal' - roleDefinitionId: roles.aiSearchContributor - } - { - principalId: workloadIdentity.outputs.principalId - principalType: 'ServicePrincipal' - roleDefinitionId: roles.aiSearchIndexDataContributor - } - { - principalId: workloadIdentity.outputs.principalId - principalType: 'ServicePrincipal' - roleDefinitionId: roles.aiSearchIndexDataReader - } - ] } } module storage 'core/storage/storage.bicep' = { - name: 'storage' + name: 'storage-deployment' params: { - name: !empty(storageAccountName) ? storageAccountName : '${abbrs.storageStorageAccounts}${replace(resourceBaseNameFinal, '-', '')}' + name: !empty(storageAccountName) + ? storageAccountName + : '${abbrs.storageStorageAccounts}${replace(resourceBaseNameFinal, '-', '')}' location: location publicNetworkAccess: enablePrivateEndpoints ? 'Disabled' : 'Enabled' tags: tags - roleAssignments: [ - { - principalId: workloadIdentity.outputs.principalId - principalType: 'ServicePrincipal' - roleDefinitionId: roles.storageBlobDataContributor - } - ] deleteRetentionPolicy: { enabled: true days: 5 @@ -269,35 +205,44 @@ module storage 'core/storage/storage.bicep' = { } } +module appInsights 'core/monitor/app-insights.bicep' = { + name: 'app-insights-deployment' + params: { + appInsightsName: '${abbrs.insightsComponents}${resourceBaseNameFinal}' + location: location + appInsightsPublicNetworkAccessForIngestion: enablePrivateEndpoints ? 'Disabled' : 'Enabled' + logAnalyticsWorkspaceId: log.outputs.id + } +} + module apim 'core/apim/apim.bicep' = { - name: 'apim' + name: 'apim-deployment' params: { apiManagementName: !empty(apimName) ? apimName : '${abbrs.apiManagementService}${resourceBaseNameFinal}' restoreAPIM: restoreAPIM - appInsightsName: '${abbrs.insightsComponents}${resourceBaseNameFinal}' - appInsightsPublicNetworkAccessForIngestion: enablePrivateEndpoints ? 'Disabled' : 'Enabled' + appInsightsId: appInsights.outputs.id + appInsightsInstrumentationKey: appInsights.outputs.instrumentationKey publicIpName: '${abbrs.networkPublicIPAddresses}${resourceBaseNameFinal}' location: location sku: apimTier skuCount: 1 // TODO expose in param for premium sku availabilityZones: [] // TODO expose in param for premium sku - publisherEmail: publisherEmail - publisherName: publisherName - logAnalyticsWorkspaceId: log.outputs.id - subnetId: vnet.properties.subnets[0].id // apim subnet + publisherEmail: apiPublisherEmail + publisherName: apiPublisherName + subnetId: vnet.outputs.apimSubnetId } } module graphragApi 'core/apim/apim.graphrag-documentation.bicep' = { - name: 'graphrag-api' + name: 'graphrag-api-deployment' params: { apimname: apim.outputs.name - backendUrl: graphRagUrl + backendUrl: appUrl } } module workloadIdentity 'core/identity/identity.bicep' = { - name: 'workload-identity' + name: 'workload-identity-deployment' params: { name: workloadIdentityName location: location @@ -312,78 +257,78 @@ module workloadIdentity 'core/identity/identity.bicep' = { } module privateDnsZone 'core/vnet/private-dns-zone.bicep' = { - name: 'private-dns-zone' + name: 'private-dns-zone-deployment' params: { name: dnsDomain vnetNames: [ - vnet.name + vnet.outputs.vnetName // name ] } } module privatelinkPrivateDns 'core/vnet/privatelink-private-dns-zones.bicep' = if (enablePrivateEndpoints) { - name: 'privatelink-private-dns-zones' + name: 'privatelink-private-dns-zones-deployment' params: { linkedVnetIds: [ - vnet.id + vnet.outputs.vnetId // id ] } } module azureMonitorPrivateLinkScope 'core/monitor/private-link-scope.bicep' = if (enablePrivateEndpoints) { - name: 'azure-monitor-privatelink-scope' + name: 'azure-monitor-privatelink-scope-deployment' params: { privateLinkScopeName: 'pls-${resourceBaseNameFinal}' privateLinkScopedResources: [ log.outputs.id - apim.outputs.appInsightsId + appInsights.outputs.id ] } } module cosmosDbPrivateEndpoint 'core/vnet/private-endpoint.bicep' = if (enablePrivateEndpoints) { - name: 'cosmosDb-private-endpoint' + name: 'cosmosDb-private-endpoint-deployment' params: { privateEndpointName: '${abbrs.privateEndpoint}cosmos-${cosmosdb.outputs.name}' location: location privateLinkServiceId: cosmosdb.outputs.id - subnetId: vnet.properties.subnets[1].id // aks subnet + subnetId: vnet.outputs.aksSubnetId groupId: 'Sql' privateDnsZoneConfigs: enablePrivateEndpoints ? privatelinkPrivateDns.outputs.cosmosDbPrivateDnsZoneConfigs : [] } } module blobStoragePrivateEndpoint 'core/vnet/private-endpoint.bicep' = if (enablePrivateEndpoints) { - name: 'blob-storage-private-endpoint' + name: 'blob-storage-private-endpoint-deployment' params: { privateEndpointName: '${abbrs.privateEndpoint}blob-${storage.outputs.name}' location: location privateLinkServiceId: storage.outputs.id - subnetId: vnet.properties.subnets[1].id // aks subnet + subnetId: vnet.outputs.aksSubnetId groupId: 'blob' privateDnsZoneConfigs: enablePrivateEndpoints ? privatelinkPrivateDns.outputs.blobStoragePrivateDnsZoneConfigs : [] } } module aiSearchPrivateEndpoint 'core/vnet/private-endpoint.bicep' = if (enablePrivateEndpoints) { - name: 'ai-search-private-endpoint' + name: 'ai-search-private-endpoint-deployment' params: { privateEndpointName: '${abbrs.privateEndpoint}search-${aiSearch.outputs.name}' location: location privateLinkServiceId: aiSearch.outputs.id - subnetId: vnet.properties.subnets[1].id // aks subnet + subnetId: vnet.outputs.aksSubnetId groupId: 'searchService' privateDnsZoneConfigs: enablePrivateEndpoints ? privatelinkPrivateDns.outputs.aiSearchPrivateDnsZoneConfigs : [] } } module privateLinkScopePrivateEndpoint 'core/vnet/private-endpoint.bicep' = if (enablePrivateEndpoints) { - name: 'privatelink-scope-private-endpoint' + name: 'privatelink-scope-private-endpoint-deployment' params: { privateEndpointName: '${abbrs.privateEndpoint}pls-${resourceBaseNameFinal}' location: location privateLinkServiceId: enablePrivateEndpoints ? azureMonitorPrivateLinkScope.outputs.id : '' - subnetId: vnet.properties.subnets[1].id // aks subnet + subnetId: vnet.outputs.aksSubnetId groupId: 'azuremonitor' privateDnsZoneConfigs: enablePrivateEndpoints ? privatelinkPrivateDns.outputs.azureMonitorPrivateDnsZoneConfigs : [] } @@ -403,16 +348,15 @@ output azure_storage_account_blob_url string = storage.outputs.primaryEndpoints. output azure_cosmosdb_endpoint string = cosmosdb.outputs.endpoint output azure_cosmosdb_name string = cosmosdb.outputs.name output azure_cosmosdb_id string = cosmosdb.outputs.id -output azure_app_insights_connection_string string = apim.outputs.appInsightsConnectionString +output azure_app_insights_connection_string string = appInsights.outputs.connectionString output azure_apim_name string = apim.outputs.name output azure_apim_gateway_url string = apim.outputs.apimGatewayUrl output azure_dns_zone_name string = privateDnsZone.outputs.name -output azure_graphrag_hostname string = graphRagHostname -output azure_graphrag_url string = graphRagUrl +output azure_app_hostname string = appHostname +output azure_app_url string = appUrl output azure_workload_identity_client_id string = workloadIdentity.outputs.clientId output azure_workload_identity_principal_id string = workloadIdentity.outputs.principalId output azure_workload_identity_name string = workloadIdentity.outputs.name -output azure_private_dns_zones array = enablePrivateEndpoints ? union( - privatelinkPrivateDns.outputs.privateDnsZones, - [privateDnsZone.outputs.name] -) : [] +output azure_private_dns_zones array = enablePrivateEndpoints + ? union(privatelinkPrivateDns.outputs.privateDnsZones, [privateDnsZone.outputs.name]) + : [] diff --git a/notebooks/2-Advanced_Getting_Started.ipynb b/notebooks/2-Advanced_Getting_Started.ipynb index 2b76000c..044d3701 100644 --- a/notebooks/2-Advanced_Getting_Started.ipynb +++ b/notebooks/2-Advanced_Getting_Started.ipynb @@ -61,18 +61,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "4", "metadata": {}, "outputs": [], "source": [ "import getpass\n", "import json\n", - "import os\n", "import sys\n", "import time\n", "from pathlib import Path\n", - "from zipfile import ZipFile\n", "\n", "import magic\n", "import pandas as pd\n", @@ -102,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "7", "metadata": { "tags": [] @@ -134,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "9", "metadata": { "tags": [] @@ -161,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "10", "metadata": {}, "outputs": [], @@ -183,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "12", "metadata": { "tags": [] @@ -192,7 +190,7 @@ "source": [ "def upload_files(\n", " file_directory: str,\n", - " storage_name: str,\n", + " container_name: str,\n", " batch_size: int = 100,\n", " overwrite: bool = True,\n", " max_retries: int = 5,\n", @@ -202,7 +200,7 @@ "\n", " Args:\n", " file_directory - a local directory of .txt files to upload. All files must be in utf-8 encoding.\n", - " storage_name - a unique name for the Azure storage container.\n", + " container_name - a unique name for the Azure storage container.\n", " batch_size - the number of files to upload in a single batch.\n", " overwrite - whether or not to overwrite files if they already exist in the storage container.\n", " max_retries - the maximum number of times to retry uploading a batch of files if the API is busy.\n", @@ -213,13 +211,13 @@ " url = endpoint + \"/data\"\n", "\n", " def upload_batch(\n", - " files: list, storage_name: str, overwrite: bool, max_retries: int\n", + " files: list, container_name: str, overwrite: bool, max_retries: int\n", " ) -> requests.Response:\n", " for _ in range(max_retries):\n", " response = requests.post(\n", " url=url,\n", " files=files,\n", - " params={\"storage_name\": storage_name, \"overwrite\": overwrite},\n", + " params={\"container_name\": container_name, \"overwrite\": overwrite},\n", " headers=headers,\n", " )\n", " # API may be busy, retry\n", @@ -248,25 +246,25 @@ " )\n", " # upload batch of files\n", " if len(batch_files) == batch_size:\n", - " response = upload_batch(batch_files, storage_name, overwrite, max_retries)\n", + " response = upload_batch(batch_files, container_name, overwrite, max_retries)\n", " # if response is not ok, return early\n", " if not response.ok:\n", " return response\n", " batch_files.clear()\n", " # upload remaining files\n", " if len(batch_files) > 0:\n", - " response = upload_batch(batch_files, storage_name, overwrite, max_retries)\n", + " response = upload_batch(batch_files, container_name, overwrite, max_retries)\n", " return response\n", "\n", "\n", - "def delete_files(storage_name: str) -> requests.Response:\n", - " \"\"\"Delete a blob storage container.\"\"\"\n", - " url = endpoint + f\"/data/{storage_name}\"\n", + "def delete_files(container_name: str) -> requests.Response:\n", + " \"\"\"Delete an azure storage container that holds raw data.\"\"\"\n", + " url = endpoint + f\"/data/{container_name}\"\n", " return requests.delete(url=url, headers=headers)\n", "\n", "\n", "def list_files() -> requests.Response:\n", - " \"\"\"List all data storage containers.\"\"\"\n", + " \"\"\"Get a list of all azure storage containers that hold raw data.\"\"\"\n", " url = endpoint + \"/data\"\n", " return requests.get(url=url, headers=headers)\n", "\n", @@ -274,41 +272,37 @@ "def build_index(\n", " storage_name: str,\n", " index_name: str,\n", - " entity_extraction_prompt_filepath: str = None,\n", - " community_prompt_filepath: str = None,\n", - " summarize_description_prompt_filepath: str = None,\n", + " entity_extraction_prompt: str = None,\n", + " entity_summarization_prompt: str = None,\n", + " community_summarization_prompt: str = None,\n", ") -> requests.Response:\n", - " \"\"\"Create a search index.\n", - " This function kicks off a job that builds a knowledge graph (KG) index from files located in a blob storage container.\n", + " \"\"\"Build a graphrag index.\n", + " This function submits a job that builds a graphrag index (i.e. a knowledge graph) from data files located in a blob storage container.\n", " \"\"\"\n", " url = endpoint + \"/index\"\n", - " prompt_files = dict()\n", - " if entity_extraction_prompt_filepath:\n", - " prompt_files[\"entity_extraction_prompt\"] = open(\n", - " entity_extraction_prompt_filepath, \"r\"\n", - " )\n", - " if community_prompt_filepath:\n", - " prompt_files[\"community_report_prompt\"] = open(community_prompt_filepath, \"r\")\n", - " if summarize_description_prompt_filepath:\n", - " prompt_files[\"summarize_descriptions_prompt\"] = open(\n", - " summarize_description_prompt_filepath, \"r\"\n", - " )\n", + " prompts = dict()\n", + " if entity_extraction_prompt:\n", + " prompts[\"entity_extraction_prompt\"] = entity_extraction_prompt\n", + " if entity_summarization_prompt:\n", + " prompts[\"summarize_descriptions_prompt\"] = entity_summarization_prompt\n", + " if community_summarization_prompt:\n", + " prompts[\"community_report_prompt\"] = community_summarization_prompt\n", " return requests.post(\n", " url,\n", - " files=prompt_files if len(prompt_files) > 0 else None,\n", - " params={\"index_name\": index_name, \"storage_name\": storage_name},\n", + " files=prompts if len(prompts) > 0 else None,\n", + " params={\"index_container_name\": index_name, \"storage_container_name\": storage_name},\n", " headers=headers,\n", " )\n", "\n", "\n", - "def delete_index(index_name: str) -> requests.Response:\n", - " \"\"\"Delete a search index.\"\"\"\n", - " url = endpoint + f\"/index/{index_name}\"\n", + "def delete_index(container_name: str) -> requests.Response:\n", + " \"\"\"Delete an azure storage container that holds a search index.\"\"\"\n", + " url = endpoint + f\"/index/{container_name}\"\n", " return requests.delete(url, headers=headers)\n", "\n", "\n", "def list_indexes() -> list:\n", - " \"\"\"List all search indexes.\"\"\"\n", + " \"\"\"Get a list of all azure storage containers that hold search indexes.\"\"\"\n", " url = endpoint + \"/index\"\n", " response = requests.get(url, headers=headers)\n", " try:\n", @@ -319,8 +313,9 @@ " return response\n", "\n", "\n", - "def index_status(index_name: str) -> requests.Response:\n", - " url = endpoint + f\"/index/status/{index_name}\"\n", + "def index_status(container_name: str) -> requests.Response:\n", + " \"\"\"Get the status of a specific index.\"\"\"\n", + " url = endpoint + f\"/index/status/{container_name}\"\n", " return requests.get(url, headers=headers)\n", "\n", "\n", @@ -341,6 +336,7 @@ "def global_search_streaming(\n", " index_name: str | list[str], query: str, community_level: int\n", ") -> requests.Response:\n", + " raise NotImplementedError(\"this functionality has been temporarily removed\")\n", " \"\"\"Run a global query across one or more indexes and stream back the response\"\"\"\n", " url = endpoint + \"/query/streaming/global\"\n", " # optional parameter: community level to query the graph at (default for global query = 1)\n", @@ -385,6 +381,7 @@ "def local_search_streaming(\n", " index_name: str | list[str], query: str, community_level: int\n", ") -> requests.Response:\n", + " raise NotImplementedError(\"this functionality has been temporarily removed\")\n", " \"\"\"Run a global query across one or more indexes and stream back the response\"\"\"\n", " url = endpoint + \"/query/streaming/local\"\n", " # optional parameter: community level to query the graph at (default for local query = 2)\n", @@ -475,15 +472,11 @@ " return response\n", "\n", "\n", - "def generate_prompts(storage_name: str, zip_file_name: str, limit: int = 1) -> None:\n", + "def generate_prompts(container_name: str, limit: int = 1) -> None:\n", " \"\"\"Generate graphrag prompts using data provided in a specific storage container.\"\"\"\n", " url = endpoint + \"/index/config/prompts\"\n", - " params = {\"storage_name\": storage_name, \"limit\": limit}\n", - " with requests.get(url, params=params, headers=headers, stream=True) as r:\n", - " r.raise_for_status()\n", - " with open(zip_file_name, \"wb\") as f:\n", - " for chunk in r.iter_content():\n", - " f.write(chunk)" + " params = {\"container_name\": container_name, \"limit\": limit}\n", + " return requests.get(url, params=params, headers=headers)" ] }, { @@ -507,7 +500,7 @@ "source": [ "response = upload_files(\n", " file_directory=file_directory,\n", - " storage_name=storage_name,\n", + " container_name=storage_name,\n", " batch_size=100,\n", " overwrite=True,\n", ")\n", @@ -550,7 +543,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "18", "metadata": {}, "outputs": [], @@ -573,14 +566,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "20", "metadata": {}, "outputs": [], "source": [ - "generate_prompts(storage_name=storage_name, limit=1, zip_file_name=\"prompts.zip\")\n", - "with ZipFile(\"prompts.zip\", \"r\") as zip_ref:\n", - " zip_ref.extractall()" + "auto_template_response = generate_prompts(container_name=storage_name, limit=1)\n", + "if auto_template_response.ok:\n", + " prompts = auto_template_response.json()\n", + "else:\n", + " print(auto_template_response.text)" ] }, { @@ -618,30 +613,20 @@ }, "outputs": [], "source": [ - "# check if prompt files exist\n", - "entity_extraction_prompt_filepath = \"prompts/entity_extraction.txt\"\n", - "community_prompt_filepath = \"prompts/community_report.txt\"\n", - "summarize_description_prompt_filepath = \"prompts/summarize_descriptions.txt\"\n", - "entity_prompt = (\n", - " entity_extraction_prompt_filepath\n", - " if os.path.isfile(entity_extraction_prompt_filepath)\n", - " else None\n", - ")\n", - "community_prompt = (\n", - " community_prompt_filepath if os.path.isfile(community_prompt_filepath) else None\n", - ")\n", - "summarize_prompt = (\n", - " summarize_description_prompt_filepath\n", - " if os.path.isfile(summarize_description_prompt_filepath)\n", - " else None\n", - ")\n", + "# check if custom prompts were generated\n", + "if \"auto_template_response\" in locals() and auto_template_response.ok:\n", + " entity_extraction_prompt = prompts[\"entity_extraction_prompt\"]\n", + " community_summarization_prompt = prompts[\"community_summarization_prompt\"]\n", + " summarize_description_prompt = prompts[\"entity_summarization_prompt\"]\n", + "else:\n", + " entity_extraction_prompt = community_summarization_prompt = summarize_description_prompt = None\n", "\n", "response = build_index(\n", " storage_name=storage_name,\n", " index_name=index_name,\n", - " entity_extraction_prompt_filepath=entity_prompt,\n", - " community_prompt_filepath=community_prompt,\n", - " summarize_description_prompt_filepath=summarize_prompt,\n", + " entity_extraction_prompt=entity_extraction_prompt,\n", + " community_summarization_prompt=community_summarization_prompt,\n", + " entity_summarization_prompt=summarize_description_prompt,\n", ")\n", "if response.ok:\n", " pprint(response.json())\n", @@ -717,7 +702,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "31", "metadata": {}, "outputs": [], @@ -757,7 +742,6 @@ }, "outputs": [], "source": [ - "%%time\n", "# pass in a single index name as a string or to query across multiple indexes, set index_name=[myindex1, myindex2]\n", "global_response = global_search(\n", " index_name=index_name,\n", @@ -769,28 +753,6 @@ "global_response_data" ] }, - { - "cell_type": "markdown", - "id": "35", - "metadata": {}, - "source": [ - "An API endpoint has been designed to support streaming back the graphrag response while executing a global query (useful in applications like a chatbot)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "36", - "metadata": {}, - "outputs": [], - "source": [ - "global_search_streaming(\n", - " index_name=index_name,\n", - " query=\"Summarize the main topics found in this data\",\n", - " community_level=1,\n", - ")" - ] - }, { "cell_type": "markdown", "id": "37", @@ -810,7 +772,6 @@ }, "outputs": [], "source": [ - "%%time\n", "# pass in a single index name as a string or to query across multiple indexes, set index_name=[myindex1, myindex2]\n", "local_response = local_search(\n", " index_name=index_name,\n", @@ -822,28 +783,6 @@ "local_response_data" ] }, - { - "cell_type": "markdown", - "id": "39", - "metadata": {}, - "source": [ - "An API endpoint has been designed to support streaming back the graphrag response while executing a local query (useful in applications like a chatbot)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "40", - "metadata": {}, - "outputs": [], - "source": [ - "local_search_streaming(\n", - " index_name=index_name,\n", - " query=\"Who are the primary actors in these communities?\",\n", - " community_level=2,\n", - ")" - ] - }, { "cell_type": "markdown", "id": "41", @@ -1005,7 +944,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "54", "metadata": {}, "outputs": [],