Skip to content

Commit

Permalink
improve error logging and mark a working version of global search
Browse files Browse the repository at this point in the history
  • Loading branch information
jgbradley1 committed Jan 30, 2025
1 parent c430cf2 commit 8c56f7f
Show file tree
Hide file tree
Showing 11 changed files with 305 additions and 194 deletions.
4 changes: 2 additions & 2 deletions backend/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Web App
This application is a FastAPI app that implements a REST API wrapper around the graphrag library.
This directory contains the source code for a FastAPI application implements a REST API wrapper around the graphrag library. The app has been packaged up as a python package for a cleaner install/deployment experience.

## Package Layout
The code has the following structure:
Expand All @@ -11,7 +11,7 @@ backend
│   ├── api # endpoint definitions
│   ├── logger # custom loggers designed for graphrag use
│   ├── main.py # initializes the FastAPI application
│   ├── typing
│   ├── typing # data validation models
│   └── utils # utility/helper functions
├── manifests # k8s manifest files
├── poetry.lock
Expand Down
41 changes: 26 additions & 15 deletions backend/graphrag_app/api/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import asyncio
import re
import traceback
from math import ceil
from typing import List

Expand All @@ -22,7 +23,6 @@
from graphrag_app.utils.common import (
delete_cosmos_container_item_if_exist,
delete_storage_container_if_exist,
desanitize_name,
get_blob_container_client,
get_cosmos_container_store_client,
sanitize_name,
Expand Down Expand Up @@ -50,9 +50,13 @@ async def get_all_data_containers():
for item in container_store_client.read_all_items():
if item["type"] == "data":
items.append(item["human_readable_name"])
except Exception:
except Exception as e:
reporter = load_pipeline_logger()
reporter.error("Error getting list of blob containers.")
reporter.error(
message="Error getting list of blob containers.",
cause=e,
stack=traceback.format_exc(),
)
raise HTTPException(
status_code=500, detail="Error getting list of blob containers."
)
Expand Down Expand Up @@ -112,6 +116,7 @@ def __exit__(self, *args):
)
async def upload_files(
files: List[UploadFile],
container_name: str,
sanitized_container_name: str = Depends(sanitize_name),
overwrite: bool = True,
):
Expand All @@ -129,7 +134,6 @@ async def upload_files(
Raises:
HTTPException: If the container name is invalid or if any error occurs during the upload process.
"""
original_container_name = desanitize_name(sanitized_container_name)
try:
# clean files - remove illegal XML characters
files = [UploadFile(Cleaner(f.file), filename=f.filename) for f in files]
Expand All @@ -152,16 +156,21 @@ async def upload_files(
cosmos_container_store_client = get_cosmos_container_store_client()
cosmos_container_store_client.upsert_item({
"id": sanitized_container_name,
"human_readable_name": original_container_name,
"human_readable_name": container_name,
"type": "data",
})
return BaseResponse(status="File upload successful.")
except Exception:
except Exception as e:
logger = load_pipeline_logger()
logger.error("Error uploading files.", details={"files": files})
logger.error(
message="Error uploading files.",
cause=e,
stack=traceback.format_exc(),
details={"files": [f.filename for f in files]},
)
raise HTTPException(
status_code=500,
detail=f"Error uploading files to container '{original_container_name}'.",
detail=f"Error uploading files to container '{container_name}'.",
)


Expand All @@ -171,25 +180,27 @@ async def upload_files(
response_model=BaseResponse,
responses={200: {"model": BaseResponse}},
)
async def delete_files(sanitized_container_name: str = Depends(sanitize_name)):
async def delete_files(
container_name: str, sanitized_container_name: str = Depends(sanitize_name)
):
"""
Delete a specified data storage container.
"""
# sanitized_container_name = sanitize_name(container_name)
original_container_name = desanitize_name(sanitized_container_name)
try:
delete_storage_container_if_exist(sanitized_container_name)
delete_cosmos_container_item_if_exist(
"container-store", sanitized_container_name
)
except Exception:
except Exception as e:
logger = load_pipeline_logger()
logger.error(
f"Error deleting container {original_container_name}.",
details={"Container": original_container_name},
message=f"Error deleting container {container_name}.",
cause=e,
stack=traceback.format_exc(),
details={"Container": container_name},
)
raise HTTPException(
status_code=500,
detail=f"Error deleting container '{original_container_name}'.",
detail=f"Error deleting container '{container_name}'.",
)
return BaseResponse(status="Success")
18 changes: 12 additions & 6 deletions backend/graphrag_app/api/graph.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import traceback

from fastapi import (
APIRouter,
Depends,
Expand All @@ -11,7 +13,6 @@
from graphrag_app.logger.load_logger import load_pipeline_logger
from graphrag_app.utils.azure_clients import AzureClientManager
from graphrag_app.utils.common import (
desanitize_name,
sanitize_name,
validate_index_file_exist,
)
Expand All @@ -27,7 +28,9 @@
summary="Retrieve a GraphML file of the knowledge graph",
response_description="GraphML file successfully downloaded",
)
async def get_graphml_file(sanitized_container_name: str = Depends(sanitize_name)):
async def get_graphml_file(
container_name, sanitized_container_name: str = Depends(sanitize_name)
):
# validate graphml file existence
azure_client_manager = AzureClientManager()
graphml_filename = "graph.graphml"
Expand All @@ -43,11 +46,14 @@ async def get_graphml_file(sanitized_container_name: str = Depends(sanitize_name
media_type="application/octet-stream",
headers={"Content-Disposition": f"attachment; filename={graphml_filename}"},
)
except Exception:
except Exception as e:
logger = load_pipeline_logger()
original_container_name = desanitize_name(sanitized_container_name)
logger.error("Could not fetch graphml file")
logger.error(
message="Could not fetch graphml file",
cause=e,
stack=traceback.format_exc(),
)
raise HTTPException(
status_code=500,
detail=f"Could not fetch graphml file for '{original_container_name}'.",
detail=f"Could not fetch graphml file for '{container_name}'.",
)
38 changes: 24 additions & 14 deletions backend/graphrag_app/api/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Licensed under the MIT License.

import os
import traceback
from time import time

from azure.identity import DefaultAzureCredential
Expand Down Expand Up @@ -30,7 +31,6 @@
from graphrag_app.utils.common import (
delete_cosmos_container_item_if_exist,
delete_storage_container_if_exist,
desanitize_name,
get_cosmos_container_store_client,
sanitize_name,
)
Expand Down Expand Up @@ -152,9 +152,13 @@ async def get_all_index_names(
for item in container_store_client.read_all_items():
if item["type"] == "index":
items.append(item["human_readable_name"])
except Exception:
except Exception as e:
logger = load_pipeline_logger()
logger.error("Error fetching index list")
logger.error(
message="Error fetching index list",
cause=e,
stack=traceback.format_exc(),
)
return IndexNameList(index_name=items)


Expand Down Expand Up @@ -184,9 +188,11 @@ def _delete_k8s_job(job_name: str, namespace: str) -> None:
try:
batch_v1 = kubernetes_client.BatchV1Api()
batch_v1.delete_namespaced_job(name=job_name, namespace=namespace)
except Exception:
except Exception as e:
logger.error(
message=f"Error deleting k8s job {job_name}.",
cause=e,
stack=traceback.format_exc(),
details={"container": job_name},
)
pass
Expand All @@ -195,9 +201,11 @@ def _delete_k8s_job(job_name: str, namespace: str) -> None:
job_pod = _get_pod_name(job_name, os.environ["AKS_NAMESPACE"])
if job_pod:
core_v1.delete_namespaced_pod(job_pod, namespace=namespace)
except Exception:
except Exception as e:
logger.error(
message=f"Error deleting k8s pod for job {job_name}.",
cause=e,
stack=traceback.format_exc(),
details={"container": job_name},
)
pass
Expand All @@ -210,6 +218,7 @@ def _delete_k8s_job(job_name: str, namespace: str) -> None:
responses={200: {"model": BaseResponse}},
)
async def delete_index(
container_name: str,
sanitized_container_name: str = Depends(sanitize_name),
):
"""
Expand All @@ -236,16 +245,16 @@ async def delete_index(
if ai_search_index_name in index_client.list_index_names():
index_client.delete_index(ai_search_index_name)

except Exception:
except Exception as e:
logger = load_pipeline_logger()
original_container_name = desanitize_name(sanitized_container_name)
logger.error(
message=f"Error encountered while deleting all data for {original_container_name}.",
stack=None,
details={"container": original_container_name},
message=f"Error encountered while deleting all data for {container_name}.",
cause=e,
stack=traceback.format_exc(),
details={"container": container_name},
)
raise HTTPException(
status_code=500, detail=f"Error deleting '{original_container_name}'."
status_code=500, detail=f"Error deleting '{container_name}'."
)

return BaseResponse(status="Success")
Expand All @@ -256,7 +265,9 @@ async def delete_index(
summary="Track the status of an indexing job",
response_model=IndexStatusResponse,
)
async def get_index_status(sanitized_container_name: str = Depends(sanitize_name)):
async def get_index_status(
container_name: str, sanitized_container_name: str = Depends(sanitize_name)
):
pipelinejob = PipelineJob()
if pipelinejob.item_exist(sanitized_container_name):
pipeline_job = pipelinejob.load_item(sanitized_container_name)
Expand All @@ -269,7 +280,6 @@ async def get_index_status(sanitized_container_name: str = Depends(sanitize_name
progress=pipeline_job.progress,
)
else:
original_container_name = desanitize_name(sanitized_container_name)
raise HTTPException(
status_code=404, detail=f"'{original_container_name}' does not exist."
status_code=404, detail=f"'{container_name}' does not exist."
)
10 changes: 5 additions & 5 deletions backend/graphrag_app/api/prompt_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from graphrag_app.logger.load_logger import load_pipeline_logger
from graphrag_app.utils.azure_clients import AzureClientManager
from graphrag_app.utils.common import desanitize_name, sanitize_name
from graphrag_app.utils.common import sanitize_name

prompt_tuning_route = APIRouter(prefix="/index/config", tags=["Prompt Tuning"])

Expand All @@ -26,6 +26,7 @@
description="Generating custom prompts from user-provided data may take several minutes to run based on the amount of data used.",
)
async def generate_prompts(
container_name: str,
limit: int = 5,
sanitized_container_name: str = Depends(sanitize_name),
):
Expand All @@ -36,11 +37,10 @@ async def generate_prompts(
# check for storage container existence
azure_client_manager = AzureClientManager()
blob_service_client = azure_client_manager.get_blob_service_client()
original_container_name = desanitize_name(sanitized_container_name)
if not blob_service_client.get_container_client(sanitized_container_name).exists():
raise HTTPException(
status_code=500,
detail=f"Storage container '{original_container_name}' does not exist.",
detail=f"Storage container '{container_name}' does not exist.",
)

# load pipeline configuration file (settings.yaml) for input data and other settings
Expand All @@ -61,7 +61,7 @@ async def generate_prompts(
except Exception as e:
logger = load_pipeline_logger()
error_details = {
"storage_name": original_container_name,
"storage_name": container_name,
}
logger.error(
message="Auto-prompt generation failed.",
Expand All @@ -71,7 +71,7 @@ async def generate_prompts(
)
raise HTTPException(
status_code=500,
detail=f"Error generating prompts for data in '{original_container_name}'. Please try a lower limit.",
detail=f"Error generating prompts for data in '{container_name}'. Please try a lower limit.",
)

prompt_content = {
Expand Down
Loading

0 comments on commit 8c56f7f

Please sign in to comment.