Skip to content

Commit

Permalink
Solution to #258 [CLIP][Server/Engine] Send images to engine / accept…
Browse files Browse the repository at this point in the history
… PIL images (#353)

* ADD: Send images to engine / accept PIL images

* deduce import

* deduce import

* annotions modify

* lint /transformer/vision/utils.py

* change list/List to Sequence for mixed input of str and Image objs

* lint for sync_engine.py

* add: unit test of PIL Image input of clip_embed models;delete: multithreading

* fix: local img not found

* modify:avoid import error while TYPE_CHECKING and define type hint for PIL.Image.Image

* modify:avoid import error while TYPE_CHECKING and define type hint for PIL.Image.Image

* modify:avoid import error while TYPE_CHECKING and define type hint for PIL.Image.Image

* modify:avoid import error in TYPE_CHECKING phase for no PIL.Image installed users, and define type hint for PIL.Image.Image

* modify:avoid import error in TYPE_CHECKING phase for no PIL.Image installed users, and define type hint for PIL.Image.Image

* modify:avoid import error in TYPE_CHECKING phase for no PIL.Image installed users, and define type hint for PIL.Image.Image

* fix:tying error of ImageClassType;use existing img instead;Sequence to List

* fix:tying error of ImageClassType;use existing img instead;Sequence to List

* add: requests dependency for img fetching in test

* add: requests dependency for img fetching in test& poetry lock file

* add: requests dependency(include types-requests) for img fetching in test& poetry lock file

* add: add back run_tests_with_hook.sh

---------

Co-authored-by: wgao <[email protected]>
Co-authored-by: 王高 <[email protected]>
  • Loading branch information
3 people authored Sep 18, 2024
1 parent 65afe2b commit 585aca1
Show file tree
Hide file tree
Showing 9 changed files with 1,803 additions and 1,612 deletions.
Empty file modified libs/client_infinity/run_tests_with_hook.sh
100755 → 100644
Empty file.
11 changes: 6 additions & 5 deletions libs/infinity_emb/infinity_emb/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
# Copyright (c) 2023-now michaelfeilfeil

from asyncio import Semaphore
from typing import Iterable, Iterator, Optional, Set, Union
from typing import Iterable, Iterator, Optional, Set, Union, List

from infinity_emb.args import EngineArgs
from infinity_emb.primitives import ImageClassType

# prometheus
from infinity_emb.inference import (
Expand Down Expand Up @@ -202,12 +203,12 @@ async def classify(
return scores, usage

async def image_embed(
self, *, images: list[str]
self, *, images: List[Union[str, "ImageClassType"]]
) -> tuple[list[EmbeddingReturnType], int]:
"""embed multiple images
Kwargs:
images (list[str]): list of image urls, to be embedded
images (list[Union[str, ImageClassType]]): list of image urls or ImageClassType objects, to be embedded
Raises:
ValueError: raised if engine is not started yet
Expand Down Expand Up @@ -339,13 +340,13 @@ async def classify(
return await self[model].classify(sentences=sentences, raw_scores=raw_scores)

async def image_embed(
self, *, model: str, images: list[str]
self, *, model: str, images: List[Union[str, "ImageClassType"]]
) -> tuple[list[EmbeddingReturnType], int]:
"""embed multiple images
Kwargs:
model (str): model name to be used
images (list[str]): list of image urls, to be embedded
images (list[Union[str, ImageClassType]]): list of image urls or ImageClassType objects, to be embedded
Raises:
ValueError: raised if engine is not started yet
Expand Down
7 changes: 4 additions & 3 deletions libs/infinity_emb/infinity_emb/inference/batch_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import time
from concurrent.futures import ThreadPoolExecutor
from queue import Queue
from typing import Any, Sequence, Set
from typing import Any, Sequence, Set, Union, List

import numpy as np

Expand All @@ -30,6 +30,7 @@
PrioritizedQueueItem,
ReRankSingle,
get_inner_item,
ImageClassType,
)
from infinity_emb.transformer.abstract import BaseTransformer
from infinity_emb.transformer.utils import get_lengths_with_tokenize
Expand Down Expand Up @@ -210,12 +211,12 @@ async def classify(
async def image_embed(
self,
*,
images: list[str],
images: List[Union[str, "ImageClassType"]],
) -> tuple[list[EmbeddingReturnType], int]:
"""Schedule a images and sentences to be embedded. Awaits until embedded.
Args:
images (list[str]): list of pre-signed urls
images (list[Union[str, ImageClassType]]): list of pre-signed urls or ImageClassType objects
Raises:
ModelNotDeployedError: If loaded model does not expose `embed`
Expand Down
11 changes: 10 additions & 1 deletion libs/infinity_emb/infinity_emb/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,22 @@
TypedDict,
TypeVar,
Union,
Any,
)

import numpy as np
import numpy.typing as npt


EmptyImageClassType: Any = Any
if TYPE_CHECKING:
from PIL.Image import Image as ImageClass
try:
from PIL.Image import Image as ImageClass

EmptyImageClassType = ImageClass
except ImportError:
pass
ImageClassType = EmptyImageClassType

# if python>=3.10 use kw_only

Expand Down
13 changes: 11 additions & 2 deletions libs/infinity_emb/infinity_emb/sync_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,19 @@
import weakref
from concurrent.futures import Future
from functools import partial
from typing import TYPE_CHECKING, Awaitable, Callable, Iterator, TypeVar
from typing import (
TYPE_CHECKING,
Awaitable,
Callable,
Iterator,
TypeVar,
Union,
List,
)

from infinity_emb.engine import AsyncEmbeddingEngine, AsyncEngineArray, EngineArgs
from infinity_emb.log_handler import logger
from infinity_emb.primitives import ImageClassType

if TYPE_CHECKING:
from infinity_emb import AsyncEmbeddingEngine
Expand Down Expand Up @@ -184,7 +193,7 @@ def classify(self, *, model: str, sentences: list[str], raw_scores: bool = False
)

@add_start_docstrings(AsyncEngineArray.image_embed.__doc__)
def image_embed(self, *, model: str, images: list[str]):
def image_embed(self, *, model: str, images: List[Union[str, ImageClassType]]):
"""sync interface of AsyncEngineArray"""
return self.async_run(
self.async_engine_array.image_embed, model=model, images=images
Expand Down
54 changes: 44 additions & 10 deletions libs/infinity_emb/infinity_emb/transformer/vision/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,59 @@
# Copyright (c) 2023-now michaelfeil

from infinity_emb._optional_imports import CHECK_PIL, CHECK_REQUESTS
from infinity_emb.primitives import ImageCorruption, ImageSingle
from infinity_emb.primitives import ImageCorruption, ImageSingle, ImageClassType
from typing import Union, List

if CHECK_PIL.is_available:
from PIL import Image # type: ignore

if CHECK_REQUESTS.is_available:
import requests # type: ignore


def resolve_images(image_urls: list[str]) -> list[ImageSingle]:
"""Resolve images from URLs."""
# TODO: improve parallel requests, safety, error handling
CHECK_REQUESTS.mark_required()
CHECK_PIL.mark_required()
def resolve_from_img_obj(img_obj: "ImageClassType") -> ImageSingle:
"""Resolve an image from a ImageClassType Object."""
return ImageSingle(image=img_obj)


def resolve_from_img_url(img_url: str) -> ImageSingle:
"""Resolve an image from an URL."""
try:
downloaded = [requests.get(url, stream=True).raw for url in image_urls]
downloaded_img = requests.get(img_url, stream=True).raw
except Exception as e:
raise ImageCorruption(f"Error downloading images: {e}")
raise ImageCorruption(f"error downloading image from url: {e}")

try:
return [ImageSingle(image=Image.open(content)) for content in downloaded]
return ImageSingle(image=Image.open(downloaded_img))
except Exception as e:
raise ImageCorruption(f"Error opening images: {e}")
raise ImageCorruption(f"error opening image from url: {e}")


def resolve_image(img: Union[str, "ImageClassType"]) -> ImageSingle:
"""Resolve a single image."""
if isinstance(img, Image.Image):
return resolve_from_img_obj(img)
elif isinstance(img, str):
return resolve_from_img_url(img)
else:
raise ValueError(
f"Invalid image type: {img} is neither str nor ImageClassType object"
)


def resolve_images(images: List[Union[str, "ImageClassType"]]) -> List[ImageSingle]:
"""Resolve images from URLs or ImageClassType Objects using multithreading."""
# TODO: improve parallel requests, safety, error handling
CHECK_REQUESTS.mark_required()
CHECK_PIL.mark_required()

resolved_imgs = []
for img in images:
try:
resolved_imgs.append(resolve_image(img))
except Exception as e:
raise ImageCorruption(
f"Failed to resolve image: {img}.\nError msg: {str(e)}"
)

return resolved_imgs
Loading

0 comments on commit 585aca1

Please sign in to comment.