Merge pull request #1 from habibasseiss/dev

update ai services & readme
habibasseiss · Feb 16, 2025 · 6b51f5a · 6b51f5a
2 parents 863a467 + 0e22bf0
commit 6b51f5a
Show file tree

Hide file tree

Showing 10 changed files with 1,276 additions and 360 deletions.
diff --git a/.env-example b/.env-example
@@ -0,0 +1,17 @@
+DATABASE_URL=
+SECRET_KEY=
+
+AWS_ACCESS_KEY_ID=
+AWS_ENDPOINT_URL_S3=
+AWS_REGION=
+AWS_SECRET_ACCESS_KEY=
+BUCKET_NAME=
+
+CORS_ORIGINS=
+FORWARDED_ALLOW_IPS=
+
+GOOGLE_API_KEY=
+TOGETHER_API_KEY=
+HYPERBOLIC_API_KEY=
+
+AUTH_TOKEN=
diff --git a/README.md b/README.md
@@ -1,3 +1,123 @@
+# VisuLinx API
+
+A robust FastAPI-based backend service for managing organizations, projects, and file processing with secure authentication.
+
+## 🚀 Features
+
+- **User Management**: Secure user authentication and authorization
+- **Organization Management**: Create and manage organizations with multiple users
+- **Project Management**: Organize work into projects within organizations
+- **File Processing**: Handle file uploads with S3 integration
+- **Preferences System**: Flexible system-wide preferences management
+- **RESTful API**: Modern, fast, and well-documented API endpoints
+- **CORS Support**: Configured for cross-origin resource sharing
+
+## 🏗 Project Structure
+
+```
+app/
+├── api.py              # FastAPI application configuration
+├── database.py         # Database connection and settings
+├── models.py           # SQLAlchemy ORM models
+├── routers/           # API route handlers
+├── schemas.py         # Pydantic models for request/response
+├── security.py        # Authentication and authorization
+├── services/         # Business logic services
+└── settings.py        # Application settings
+```
+
+## 🔧 Technical Stack
+
+- **Framework**: FastAPI
+- **Database**: SQLAlchemy ORM
+- **Authentication**: JWT-based authentication
+- **Storage**: AWS S3 integration
+- **Data Validation**: Pydantic
+- **API Documentation**: Automatic OpenAPI/Swagger docs
+
+## 💻 Key Components
+
+### Models
+- **User**: Manages user accounts and authentication
+- **Organization**: Handles multi-user organizations
+- **Project**: Organizes work within organizations
+- **File**: Manages file uploads and processing
+- **Preference**: Stores system-wide preferences
+
+### API Endpoints
+- `/users`: User management endpoints
+- `/auth`: Authentication endpoints
+- `/organizations`: Organization management
+- `/projects`: Project-related operations
+- `/preferences`: System preferences management
+
+### 🤖 AI Services
+
+The project integrates with multiple AI providers for advanced image analysis and object detection:
+
+#### Supported AI Providers
+- **Together AI**: Uses the Qwen2-VL-72B-Instruct model for high-quality visual analysis
+- **Hyperbolic AI**: Implements the same Qwen model through a different API endpoint
+- **Google Gemini**: Utilizes Gemini 1.5 Pro for advanced image processing
+
+#### Features
+- **Object Detection**: Extract bounding boxes for objects in images
+- **Image Processing**: Automatic image resizing and compression for optimal AI processing
+- **Multi-Provider Support**: Fallback options across different AI services
+- **Standardized Interface**: Consistent API across all providers through the `AiService` abstract base class
+
+#### Configuration
+The AI services require the following environment variables:
+```
+GOOGLE_API_KEY=     # For Gemini AI
+TOGETHER_API_KEY=   # For Together AI
+HYPERBOLIC_API_KEY= # For Hyperbolic AI
+```
+
+## 🔒 Security Features
+
+- Secure password hashing
+- JWT token-based authentication
+- Role-based access control
+- CORS middleware configuration
+
+## 🚀 Getting Started
+
+### Prerequisites
+
+- Python >= 3.13
+- uv (Python package installer)
+
+### Installation
+
+1. Clone the repository
+2. Install dependencies using uv:
+   ```bash
+   uv sync
+   ```
+3. Configure environment variables
+4. Run the application:
+   ```bash
+   uv run fastapi dev
+   ```
+
+## 📝 API Documentation
+
+Once the server is running, access the API documentation at:
+- Swagger UI: `http://localhost:8000/docs`
+
+## 🤝 Contributing
+
+1. Fork the repository
+2. Create a feature branch
+3. Commit your changes
+4. Push to the branch
+5. Create a Pull Request
+
+## 📄 License
+
+MIT License
+
 ## Autogenerate migration
 
 ```sh
@@ -8,4 +128,3 @@ uv run alembic revision --autogenerate -m "add ... table"
 
 ```sh
 python -c "import secrets; print(secrets.token_urlsafe(32))"
-```
diff --git a/app/routers/projects.py b/app/routers/projects.py
@@ -1,4 +1,5 @@
 import asyncio
+import json
 import logging
 from datetime import datetime
 from http import HTTPStatus
@@ -14,26 +15,28 @@
     Response,
     UploadFile,
 )
-from sqlalchemy import select
+from sqlalchemy import func, select
 from sqlalchemy.orm import Session
 
 from app.database import get_session
-from app.models import File, Organization, Project, User
-from app.schemas import FileSchema, ProjectList, ProjectPublic, ProjectSchema
+from app.models import File, Organization, Preference, Project, User
+from app.schemas import (
+    FileSchema,
+    ProjectList,
+    ProjectPublic,
+    ProjectPublicList,
+    ProjectSchema,
+)
 from app.security import get_current_user
-from app.services.document_service import process
+from app.services import ai_service, document_service
 from app.services.upload_service import (
     delete_file_from_s3,
     get_download_url,
     upload_file_to_s3,
 )
-from app.settings import Settings
 
 logger = logging.getLogger(__name__)
 
-settings = Settings.model_validate({})
-
-
 router = APIRouter(
     prefix='/organizations/{organization_id}/projects', tags=['projects']
 )
@@ -97,7 +100,25 @@ def list_organization_projects(
             Project.deleted_at.is_(None),
         )
     ).all()
-    return {'projects': projects}
+
+    project_list = []
+    for project in projects:
+        file_count = session.scalar(
+            select(func.count()).where(File.project_id == project.id)
+        )
+        if project.organization_id:
+            project_list.append(
+                ProjectPublicList(
+                    id=project.id,
+                    name=project.name,
+                    description=project.description,
+                    organization_id=project.organization_id,
+                    created_at=project.created_at,
+                    file_count=file_count or 0,
+                )
+            )
+
+    return {'projects': project_list}
 
 
 @router.post('/', response_model=ProjectPublic, status_code=HTTPStatus.CREATED)
@@ -236,7 +257,7 @@ async def upload(  # noqa: PLR0913, PLR0917
     for result, download_url in zip(results, download_urls):
         if result and result.id and result.mime_type == 'application/pdf':
             background_tasks.add_task(
-                process,
+                document_service.extract_text,
                 download_url,
                 result.id,
                 session,
@@ -341,3 +362,85 @@ async def download_file(
     download_url = await get_download_url(file.path, file.original_filename)
 
     return {'download_url': download_url}
+
+
+@router.get(
+    '/{project_id}/files/{file_id}/extract_bounding_boxes',
+    response_model=ai_service.DetectedObjectListSchema,
+)
+async def extract_bounding_boxes(
+    organization_id: UUID,
+    project_id: UUID,
+    file_id: UUID,
+    user: CurrentUser,
+    session: DbSession,
+):
+    _ = get_project(session, user, organization_id, project_id)
+
+    preferences = {
+        pref.key: pref.value
+        for pref in session.scalars(
+            select(Preference).where(
+                Preference.key.in_(['system_prompt', 'assistant_prompt'])
+            )
+        ).all()
+    }
+
+    image_file = session.scalar(
+        select(File).where(
+            File.id == file_id,
+            File.project_id == project_id,
+        )
+    )
+    if not image_file:
+        raise HTTPException(
+            status_code=HTTPStatus.NOT_FOUND,
+            detail='File not found.',
+        )
+
+    if not image_file.mime_type.startswith('image'):
+        raise HTTPException(
+            status_code=HTTPStatus.BAD_REQUEST,
+            detail='File is not an image.',
+        )
+
+    download_url = await get_download_url(
+        image_file.path, image_file.original_filename
+    )
+
+    documents = session.scalars(
+        select(File).where(
+            File.project_id == project_id,
+            File.mime_type == 'application/pdf',
+            File.processed_at.isnot(None),
+        )
+    ).all()
+
+    if not documents:
+        raise HTTPException(
+            status_code=HTTPStatus.BAD_REQUEST,
+            detail='No PDF documents found in project.',
+        )
+
+    document_contents = {
+        document.original_filename: document.contents
+        for document in documents
+        if document.contents is not None
+    }
+
+    ai = ai_service.GeminiAiService()
+
+    bounding_boxes = ai.extract_bounding_boxes(
+        image_url=download_url,
+        document_contents=document_contents,
+        system_prompt=preferences['system_prompt'],
+        assistant_prompt=preferences['assistant_prompt'],
+    )
+
+    # image_file.contents = str(bounding_boxes)
+    image_file.contents = json.dumps(bounding_boxes)
+    image_file.processed_at = datetime.now()
+    session.commit()
+    session.refresh(image_file)
+
+    return bounding_boxes
diff --git a/app/schemas.py b/app/schemas.py
@@ -24,6 +24,7 @@ class FilePublic(BaseModel):
     size: int
     mime_type: str
     original_filename: str
+    contents: str | None
     created_at: datetime
     updated_at: datetime
     processed_at: datetime | None
@@ -49,8 +50,18 @@ class ProjectPublic(BaseModel):
     model_config = ConfigDict(from_attributes=True)
 
 
+class ProjectPublicList(BaseModel):
+    id: UUID
+    name: str
+    description: str
+    organization_id: UUID
+    created_at: datetime
+    file_count: int
+    model_config = ConfigDict(from_attributes=True)
+
+
 class ProjectList(BaseModel):
-    projects: list[ProjectPublic]
+    projects: list[ProjectPublicList]
 
 
 class OrganizationBasic(BaseModel):