From e5c149810ee56e6e30fe7da313bab326c53b4014 Mon Sep 17 00:00:00 2001 From: wirthual Date: Tue, 24 Sep 2024 06:37:45 +0200 Subject: [PATCH 01/14] update readme --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 53b1be47..e07f80e5 100644 --- a/README.md +++ b/README.md @@ -213,9 +213,8 @@ sentences = ["This is awesome.", "I am bored."] url = "https://bigsoundbank.com/UPLOAD/wav/2380.wav" raw_bytes = requests.get(url, stream=True).content -data, samplerate = sf.read(io.BytesIO(raw_bytes)) -audios = [data] +audios = [raw_bytes] engine_args = EngineArgs( model_name_or_path = "laion/clap-htsat-unfused", dtype="float32", @@ -233,6 +232,8 @@ async def embed(engine: AsyncEmbeddingEngine): asyncio.run(embed(array["laion/clap-htsat-unfused"])) ``` +* Note: The sampling rate of the audio data needs to match the model * + Example models: - [Clap Models from LAION](https://huggingface.co/collections/laion/clap-contrastive-language-audio-pretraining-65415c0b18373b607262a490) From 727da50d0d643948d84a727909bec8166f286afc Mon Sep 17 00:00:00 2001 From: wirthual Date: Tue, 24 Sep 2024 06:43:53 +0200 Subject: [PATCH 02/14] extract audio related code into audio utils --- .../infinity_emb/inference/batch_handler.py | 3 +- .../infinity_emb/transformer/audio/utils.py | 66 +++++++++++++++++++ .../infinity_emb/transformer/vision/utils.py | 58 +--------------- 3 files changed, 69 insertions(+), 58 deletions(-) create mode 100644 libs/infinity_emb/infinity_emb/transformer/audio/utils.py diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index 031bb19c..f69ba0b4 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -33,8 +33,9 @@ get_inner_item, ) from infinity_emb.transformer.abstract import BaseTransformer +from infinity_emb.transformer.audio.utils import resolve_audios from infinity_emb.transformer.utils import get_lengths_with_tokenize -from infinity_emb.transformer.vision.utils import resolve_audios, resolve_images +from infinity_emb.transformer.vision.utils import resolve_images class ShutdownReadOnly: diff --git a/libs/infinity_emb/infinity_emb/transformer/audio/utils.py b/libs/infinity_emb/infinity_emb/transformer/audio/utils.py new file mode 100644 index 00000000..999fea45 --- /dev/null +++ b/libs/infinity_emb/infinity_emb/transformer/audio/utils.py @@ -0,0 +1,66 @@ +import asyncio +import io +from typing import Union + +from infinity_emb._optional_imports import CHECK_AIOHTTP, CHECK_SOUNDFILE +from infinity_emb.primitives import ( + AudioCorruption, + AudioSingle, +) + +if CHECK_AIOHTTP.is_available: + import aiohttp + +if CHECK_SOUNDFILE.is_available: + import soundfile as sf # type: ignore + + +async def resolve_audio( + audio: Union[str, bytes], + allowed_sampling_rate: int, + session: "aiohttp.ClientSession", +) -> AudioSingle: + if isinstance(audio, bytes): + try: + audio_bytes = io.BytesIO(audio) + except Exception as e: + raise AudioCorruption(f"Error opening audio: {e}") + else: + try: + downloaded = await (await session.get(audio)).read() + # downloaded = requests.get(audio, stream=True).content + audio_bytes = io.BytesIO(downloaded) + except Exception as e: + raise AudioCorruption(f"Error downloading audio.\nError msg: {str(e)}") + + try: + data, rate = sf.read(audio_bytes) + if rate != allowed_sampling_rate: + raise AudioCorruption( + f"Audio sample rate is not {allowed_sampling_rate}Hz, it is {rate}Hz." + ) + return AudioSingle(audio=data, sampling_rate=rate) + except Exception as e: + raise AudioCorruption(f"Error opening audio: {e}.\nError msg: {str(e)}") + + +async def resolve_audios( + audio_urls: list[Union[str, bytes]], allowed_sampling_rate: int +) -> list[AudioSingle]: + """Resolve audios from URLs.""" + CHECK_AIOHTTP.mark_required() + CHECK_SOUNDFILE.mark_required() + + resolved_audios: list[AudioSingle] = [] + async with aiohttp.ClientSession(trust_env=True) as session: + try: + resolved_audios = await asyncio.gather( + *[ + resolve_audio(audio, allowed_sampling_rate, session) + for audio in audio_urls + ] + ) + except Exception as e: + raise AudioCorruption(f"Failed to resolve audio: {e}") + + return resolved_audios diff --git a/libs/infinity_emb/infinity_emb/transformer/vision/utils.py b/libs/infinity_emb/infinity_emb/transformer/vision/utils.py index 08ff04ae..39cbfecb 100644 --- a/libs/infinity_emb/infinity_emb/transformer/vision/utils.py +++ b/libs/infinity_emb/infinity_emb/transformer/vision/utils.py @@ -5,10 +5,8 @@ import io from typing import List, Union -from infinity_emb._optional_imports import CHECK_AIOHTTP, CHECK_PIL, CHECK_SOUNDFILE +from infinity_emb._optional_imports import CHECK_AIOHTTP, CHECK_PIL from infinity_emb.primitives import ( - AudioCorruption, - AudioSingle, ImageClassType, ImageCorruption, ImageSingle, @@ -20,9 +18,6 @@ if CHECK_PIL.is_available: from PIL import Image # type: ignore -if CHECK_SOUNDFILE.is_available: - import soundfile as sf # type: ignore - def resolve_from_img_obj(img_obj: "ImageClassType") -> ImageSingle: """Resolve an image from a ImageClassType Object.""" @@ -90,54 +85,3 @@ async def resolve_images( ) return resolved_imgs - - -async def resolve_audio( - audio: Union[str, bytes], - allowed_sampling_rate: int, - session: "aiohttp.ClientSession", -) -> AudioSingle: - if isinstance(audio, bytes): - try: - audio_bytes = io.BytesIO(audio) - except Exception as e: - raise AudioCorruption(f"Error opening audio: {e}") - else: - try: - downloaded = await (await session.get(audio)).read() - # downloaded = requests.get(audio, stream=True).content - audio_bytes = io.BytesIO(downloaded) - except Exception as e: - raise AudioCorruption(f"Error downloading audio.\nError msg: {str(e)}") - - try: - data, rate = sf.read(audio_bytes) - if rate != allowed_sampling_rate: - raise AudioCorruption( - f"Audio sample rate is not {allowed_sampling_rate}Hz, it is {rate}Hz." - ) - return AudioSingle(audio=data, sampling_rate=rate) - except Exception as e: - raise AudioCorruption(f"Error opening audio: {e}.\nError msg: {str(e)}") - - -async def resolve_audios( - audio_urls: list[Union[str, bytes]], allowed_sampling_rate: int -) -> list[AudioSingle]: - """Resolve audios from URLs.""" - CHECK_AIOHTTP.mark_required() - CHECK_SOUNDFILE.mark_required() - - resolved_audios: list[AudioSingle] = [] - async with aiohttp.ClientSession(trust_env=True) as session: - try: - resolved_audios = await asyncio.gather( - *[ - resolve_audio(audio, allowed_sampling_rate, session) - for audio in audio_urls - ] - ) - except Exception as e: - raise AudioCorruption(f"Failed to resolve audio: {e}") - - return resolved_audios From eb62d3e369b21f47421710a52c3f2145be41192d Mon Sep 17 00:00:00 2001 From: wirthual Date: Fri, 27 Sep 2024 04:41:06 +0200 Subject: [PATCH 03/14] add test cases for audio and vision --- docs/docs/cli_v2.md | 165 +++++++++++++----- libs/infinity_emb/tests/conftest.py | 4 +- .../tests/end_to_end/test_torch_audio.py | 94 ++++++++++ .../tests/end_to_end/test_torch_vision.py | 94 ++++++++++ 4 files changed, 314 insertions(+), 43 deletions(-) create mode 100644 libs/infinity_emb/tests/end_to_end/test_torch_audio.py create mode 100644 libs/infinity_emb/tests/end_to_end/test_torch_vision.py diff --git a/docs/docs/cli_v2.md b/docs/docs/cli_v2.md index 33fd3d3a..a02a2c5b 100644 --- a/docs/docs/cli_v2.md +++ b/docs/docs/cli_v2.md @@ -5,47 +5,128 @@ Note: The section below is auto-generated by the makefile. ```bash infinity_emb v2 --help - - Usage: infinity_emb v2 [OPTIONS] - - Infinity API ♾️ cli v2. MIT License. Copyright (c) 2023-now Michael Feil - Multiple Model CLI Playbook: - - 1. cli options can be overloaded i.e. `v2 --model-id model/id1 --model-id/id2 --batch-size 8 --batch-size 4` - - 2. or adapt the defaults by setting ENV Variables separated by `;`: INFINITY_MODEL_ID="model/id1;model/id2;" && INFINITY_BATCH_SIZE="8;4;" - - 3. single items are broadcasted to `--model-id` length, making `v2 --model-id model/id1 --model-id/id2 --batch-size 8` both models have batch-size 8. - -╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ --model-id TEXT Huggingface model repo id. Subset of possible models: https://huggingface.co/models?other=text-embeddings-inference& [env var: `INFINITY_MODEL_ID`] │ -│ [default: michaelfeil/bge-small-en-v1.5] │ -│ --served-model-name TEXT the nickname for the API, under which the model_id can be selected [env var: `INFINITY_SERVED_MODEL_NAME`] │ -│ --batch-size INTEGER maximum batch size for inference [env var: `INFINITY_BATCH_SIZE`] [default: 32] │ -│ --revision TEXT huggingface model repo revision. [env var: `INFINITY_REVISION`] │ -│ --trust-remote-code --no-trust-remote-code if potential remote modeling code from huggingface repo is trusted. [env var: `INFINITY_TRUST_REMOTE_CODE`] [default: trust-remote-code] │ -│ --engine [torch|ctranslate2|optimum|debugengine] Which backend to use. `torch` uses Pytorch GPU/CPU, optimum uses ONNX on GPU/CPU/NVIDIA-TensorRT, `CTranslate2` uses torch+ctranslate2 on CPU/GPU. │ -│ [env var: `INFINITY_ENGINE`] │ -│ [default: torch] │ -│ --model-warmup --no-model-warmup if model should be warmed up after startup, and before ready. [env var: `INFINITY_MODEL_WARMUP`] [default: model-warmup] │ -│ --vector-disk-cache --no-vector-disk-cache If hash(request)/results should be cached to SQLite for latency improvement. [env var: `INFINITY_VECTOR_DISK_CACHE`] [default: vector-disk-cache] │ -│ --device [cpu|cuda|mps|tensorrt|auto] device to use for computing the model forward pass. [env var: `INFINITY_DEVICE`] [default: auto] │ -│ --lengths-via-tokenize --no-lengths-via-tokenize if True, returned tokens is based on actual tokenizer count. If false, uses len(input) as proxy. [env var: `INFINITY_LENGTHS_VIA_TOKENIZE`] │ -│ [default: lengths-via-tokenize] │ -│ --dtype [float32|float16|int8|fp8|auto] dtype for the model weights. [env var: `INFINITY_DTYPE`] [default: auto] │ -│ --embedding-dtype [float32|int8|uint8|binary|ubinary] dtype post-forward pass. If != `float32`, using Post-Forward Static quantization. [env var: `INFINITY_EMBEDDING_DTYPE`] [default: float32] │ -│ --pooling-method [mean|cls|auto] overwrite the pooling method if inferred incorrectly. [env var: `INFINITY_POOLING_METHOD`] [default: auto] │ -│ --compile --no-compile Enable usage of `torch.compile(dynamic=True)` if engine relies on it. [env var: `INFINITY_COMPILE`] [default: compile] │ -│ --bettertransformer --no-bettertransformer Enables varlen flash-attention-2 via the `BetterTransformer` implementation. If available for this model. [env var: `INFINITY_BETTERTRANSFORMER`] │ -│ [default: bettertransformer] │ -│ --preload-only --no-preload-only If true, only downloads models and verifies setup, then exit. Recommended for pre-caching the download in a Dockerfile. [env var: `INFINITY_PRELOAD_ONLY`] │ -│ [default: no-preload-only] │ -│ --host TEXT host for the FastAPI uvicorn server [env var: `INFINITY_HOST`] [default: 0.0.0.0] │ -│ --port INTEGER port for the FastAPI uvicorn server [env var: `INFINITY_PORT`] [default: 7997] │ -│ --url-prefix TEXT prefix for all routes of the FastAPI uvicorn server. Useful if you run behind a proxy / cascaded API. [env var: `INFINITY_URL_PREFIX`] │ -│ --redirect-slash TEXT where to redirect `/` requests to. [env var: `INFINITY_REDIRECT_SLASH`] [default: /docs] │ -│ --log-level [critical|error|warning|info|debug|trace] console log level. [env var: `INFINITY_LOG_LEVEL`] [default: info] │ -│ --permissive-cors --no-permissive-cors whether to allow permissive cors. [env var: `INFINITY_PERMISSIVE_CORS`] [default: no-permissive-cors] │ -│ --api-key TEXT api_key used for authentication headers. [env var: `INFINITY_API_KEY`] │ -│ --proxy-root-path TEXT Proxy prefix for the application. See: https://fastapi.tiangolo.com/advanced/behind-a-proxy/ [env var: `INFINITY_PROXY_ROOT_PATH`] │ -│ --help Show this message and exit. │ -╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + Usage: infinity_emb v2 [OPTIONS] + + Infinity API ♾️ cli v2. MIT License. Copyright (c) 2023-now Michael Feil + Multiple Model CLI Playbook: + - 1. cli options can be overloaded i.e. `v2 --model-id model/id1 --model-id/id2 --batch-size 8 --batch-size 4` + - 2. or adapt the defaults by setting ENV Variables separated by `;`: INFINITY_MODEL_ID="model/id1;model/id2;" && + INFINITY_BATCH_SIZE="8;4;" + - 3. single items are broadcasted to `--model-id` length, making `v2 --model-id model/id1 --model-id/id2 --batch-size 8` both + models have batch-size 8. + +╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --model-id TEXT Huggingface model repo id. Subset │ +│ of possible models: │ +│ https://huggingface.co/models?othe… │ +│ [env var: `INFINITY_MODEL_ID`] │ +│ [default: │ +│ michaelfeil/bge-small-en-v1.5] │ +│ --served-model-name TEXT the nickname for the API, under │ +│ which the model_id can be selected │ +│ [env var: │ +│ `INFINITY_SERVED_MODEL_NAME`] │ +│ --batch-size INTEGER maximum batch size for inference │ +│ [env var: `INFINITY_BATCH_SIZE`] │ +│ [default: 32] │ +│ --revision TEXT huggingface model repo revision. │ +│ [env var: `INFINITY_REVISION`] │ +│ --trust-remote-code --no-trust-remote-code if potential remote modeling code │ +│ from huggingface repo is trusted. │ +│ [env var: │ +│ `INFINITY_TRUST_REMOTE_CODE`] │ +│ [default: trust-remote-code] │ +│ --engine [torch|ctranslate2|optimum|debuge Which backend to use. `torch` uses │ +│ ngine] Pytorch GPU/CPU, optimum uses ONNX │ +│ on GPU/CPU/NVIDIA-TensorRT, │ +│ `CTranslate2` uses │ +│ torch+ctranslate2 on CPU/GPU. │ +│ [env var: `INFINITY_ENGINE`] │ +│ [default: torch] │ +│ --model-warmup --no-model-warmup if model should be warmed up after │ +│ startup, and before ready. │ +│ [env var: `INFINITY_MODEL_WARMUP`] │ +│ [default: model-warmup] │ +│ --vector-disk-cache --no-vector-disk-cache If hash(request)/results should be │ +│ cached to SQLite for latency │ +│ improvement. │ +│ [env var: │ +│ `INFINITY_VECTOR_DISK_CACHE`] │ +│ [default: vector-disk-cache] │ +│ --device [cpu|cuda|mps|tensorrt|auto] device to use for computing the │ +│ model forward pass. │ +│ [env var: `INFINITY_DEVICE`] │ +│ [default: auto] │ +│ --lengths-via-tokenize --no-lengths-via-tokenize if True, returned tokens is based │ +│ on actual tokenizer count. If │ +│ false, uses len(input) as proxy. │ +│ [env var: │ +│ `INFINITY_LENGTHS_VIA_TOKENIZE`] │ +│ [default: lengths-via-tokenize] │ +│ --dtype [float32|float16|int8|fp8|auto] dtype for the model weights. │ +│ [env var: `INFINITY_DTYPE`] │ +│ [default: auto] │ +│ --embedding-dtype [float32|int8|uint8|binary|ubinar dtype post-forward pass. If != │ +│ y] `float32`, using Post-Forward │ +│ Static quantization. │ +│ [env var: │ +│ `INFINITY_EMBEDDING_DTYPE`] │ +│ [default: float32] │ +│ --pooling-method [mean|cls|auto] overwrite the pooling method if │ +│ inferred incorrectly. │ +│ [env var: │ +│ `INFINITY_POOLING_METHOD`] │ +│ [default: auto] │ +│ --compile --no-compile Enable usage of │ +│ `torch.compile(dynamic=True)` if │ +│ engine relies on it. │ +│ [env var: `INFINITY_COMPILE`] │ +│ [default: compile] │ +│ --bettertransformer --no-bettertransformer Enables varlen flash-attention-2 │ +│ via the `BetterTransformer` │ +│ implementation. If available for │ +│ this model. │ +│ [env var: │ +│ `INFINITY_BETTERTRANSFORMER`] │ +│ [default: bettertransformer] │ +│ --preload-only --no-preload-only If true, only downloads models and │ +│ verifies setup, then exit. │ +│ Recommended for pre-caching the │ +│ download in a Dockerfile. │ +│ [env var: `INFINITY_PRELOAD_ONLY`] │ +│ [default: no-preload-only] │ +│ --host TEXT host for the FastAPI uvicorn server │ +│ [env var: `INFINITY_HOST`] │ +│ [default: 0.0.0.0] │ +│ --port INTEGER port for the FastAPI uvicorn server │ +│ [env var: `INFINITY_PORT`] │ +│ [default: 7997] │ +│ --url-prefix TEXT prefix for all routes of the │ +│ FastAPI uvicorn server. Useful if │ +│ you run behind a proxy / cascaded │ +│ API. │ +│ [env var: `INFINITY_URL_PREFIX`] │ +│ --redirect-slash TEXT where to redirect `/` requests to. │ +│ [env var: │ +│ `INFINITY_REDIRECT_SLASH`] │ +│ [default: /docs] │ +│ --log-level [critical|error|warning|info|debu console log level. │ +│ g|trace] [env var: `INFINITY_LOG_LEVEL`] │ +│ [default: info] │ +│ --permissive-cors --no-permissive-cors whether to allow permissive cors. │ +│ [env var: │ +│ `INFINITY_PERMISSIVE_CORS`] │ +│ [default: no-permissive-cors] │ +│ --api-key TEXT api_key used for authentication │ +│ headers. │ +│ [env var: `INFINITY_API_KEY`] │ +│ --proxy-root-path TEXT Proxy prefix for the application. │ +│ See: │ +│ https://fastapi.tiangolo.com/advan… │ +│ [env var: │ +│ `INFINITY_PROXY_ROOT_PATH`] │ +│ --help Show this message and exit. │ +╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` diff --git a/libs/infinity_emb/tests/conftest.py b/libs/infinity_emb/tests/conftest.py index 4fec5bd5..df557695 100644 --- a/libs/infinity_emb/tests/conftest.py +++ b/libs/infinity_emb/tests/conftest.py @@ -8,8 +8,10 @@ pytest.DEFAULT_BERT_MODEL = "michaelfeil/bge-small-en-v1.5" pytest.DEFAULT_RERANKER_MODEL = "mixedbread-ai/mxbai-rerank-xsmall-v1" pytest.DEFAULT_CLASSIFIER_MODEL = "SamLowe/roberta-base-go_emotions" +pytest.DEFAULT_AUDIO_MODEL = "laion/clap-htsat-unfused" +pytest.DEFAULT_VISION_MODEL = "wkcn/TinyCLIP-ViT-8M-16-Text-3M-YFCC15M" -pytest.ENGINE_METHODS = ["embed", "image_embed", "classify", "rerank"] +pytest.ENGINE_METHODS = ["embed", "image_embed", "classify", "rerank", "audio_embed"] @pytest.fixture diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py new file mode 100644 index 00000000..99672a5b --- /dev/null +++ b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py @@ -0,0 +1,94 @@ +import pytest +import torch +from asgi_lifespan import LifespanManager +from fastapi import status +from httpx import AsyncClient + +from infinity_emb import create_server +from infinity_emb.args import EngineArgs +from infinity_emb.primitives import Device, InferenceEngine + +PREFIX = "/v1_ct2" +MODEL: str = pytest.DEFAULT_AUDIO_MODEL # type: ignore[assignment] +batch_size = 32 if torch.cuda.is_available() else 8 + +app = create_server( + url_prefix=PREFIX, + engine_args_list=[ + EngineArgs( + model_name_or_path=MODEL, + batch_size=batch_size, + engine=InferenceEngine.torch, + device=Device.auto if not torch.backends.mps.is_available() else Device.cpu, + ) + ], +) + + +@pytest.fixture() +async def client(): + async with AsyncClient( + app=app, base_url="http://test", timeout=20 + ) as client, LifespanManager(app): + yield client + + +@pytest.mark.anyio +async def test_model_route(client): + response = await client.get(f"{PREFIX}/models") + assert response.status_code == 200 + rdata = response.json() + assert "data" in rdata + assert rdata["data"][0].get("id", "") == MODEL + assert isinstance(rdata["data"][0].get("stats"), dict) + assert "audio_embed" in rdata["data"][0]["capabilities"] + + +@pytest.mark.anyio +async def test_audio_single(client): + audio_url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" + + response = await client.post( + f"{PREFIX}/embeddings_audio", + json={"model": MODEL, "input": audio_url}, + ) + assert response.status_code == 200 + rdata = response.json() + assert "model" in rdata + assert "usage" in rdata + rdata_results = rdata["data"] + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 + + +@pytest.mark.anyio +@pytest.mark.parametrize("no_of_audios", [0, 1, 5, 10]) +async def test_audio_multiple(client, no_of_audios): + audio_url = [ + "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" + ] * no_of_audios + + response = await client.post( + f"{PREFIX}/embeddings_image", + json={"model": MODEL, "input": audio_url}, + ) + assert response.status_code == 200 + rdata = response.json() + rdata_results = rdata["data"] + assert len(rdata_results) == no_of_audios + if no_of_audios: + assert "model" in rdata + assert "usage" in rdata + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 + + +@pytest.mark.anyio +async def test_audio_fail(client): + audio_url = "https://www.google.com/404" + + response = await client.post( + f"{PREFIX}/embeddings_image", + json={"model": MODEL, "input": audio_url}, + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py new file mode 100644 index 00000000..2d41f10e --- /dev/null +++ b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py @@ -0,0 +1,94 @@ +import pytest +import torch +from asgi_lifespan import LifespanManager +from fastapi import status +from httpx import AsyncClient + +from infinity_emb import create_server +from infinity_emb.args import EngineArgs +from infinity_emb.primitives import Device, InferenceEngine + +PREFIX = "/v1_ct2" +MODEL: str = pytest.DEFAULT_VISION_MODEL # type: ignore[assignment] +batch_size = 32 if torch.cuda.is_available() else 8 + +app = create_server( + url_prefix=PREFIX, + engine_args_list=[ + EngineArgs( + model_name_or_path=MODEL, + batch_size=batch_size, + engine=InferenceEngine.torch, + device=Device.auto if not torch.backends.mps.is_available() else Device.cpu, + ) + ], +) + + +@pytest.fixture() +async def client(): + async with AsyncClient( + app=app, base_url="http://test", timeout=20 + ) as client, LifespanManager(app): + yield client + + +@pytest.mark.anyio +async def test_model_route(client): + response = await client.get(f"{PREFIX}/models") + assert response.status_code == 200 + rdata = response.json() + assert "data" in rdata + assert rdata["data"][0].get("id", "") == MODEL + assert isinstance(rdata["data"][0].get("stats"), dict) + assert "image_embed" in rdata["data"][0]["capabilities"] + + +@pytest.mark.anyio +async def test_vision_single(client): + image_url = "http://images.cocodataset.org/val2017/000000039769.jpg" + + response = await client.post( + f"{PREFIX}/embeddings_image", + json={"model": MODEL, "input": image_url}, + ) + assert response.status_code == 200 + rdata = response.json() + assert "model" in rdata + assert "usage" in rdata + rdata_results = rdata["data"] + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 + + +@pytest.mark.anyio +@pytest.mark.parametrize("no_of_images", [0, 1, 5, 10]) +async def test_vision_multiple(client, no_of_images): + image_url = [ + "http://images.cocodataset.org/val2017/000000039769.jpg" + ] * no_of_images + + response = await client.post( + f"{PREFIX}/embeddings_image", + json={"model": MODEL, "input": image_url}, + ) + assert response.status_code == 200 + rdata = response.json() + rdata_results = rdata["data"] + assert len(rdata_results) == no_of_images + if no_of_images: + assert "model" in rdata + assert "usage" in rdata + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 + + +@pytest.mark.anyio +async def test_vision_fail(client): + image_url = "https://www.google.com/404" + + response = await client.post( + f"{PREFIX}/embeddings_image", + json={"model": MODEL, "input": image_url}, + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST From 923b85e05939e6f360e169097e46bb17bff46c42 Mon Sep 17 00:00:00 2001 From: wirthual Date: Fri, 27 Sep 2024 04:46:12 +0200 Subject: [PATCH 04/14] revert docs v2 --- docs/docs/cli_v2.md | 170 +++++++++++--------------------------------- 1 file changed, 41 insertions(+), 129 deletions(-) diff --git a/docs/docs/cli_v2.md b/docs/docs/cli_v2.md index a02a2c5b..cc3b3317 100644 --- a/docs/docs/cli_v2.md +++ b/docs/docs/cli_v2.md @@ -1,132 +1,44 @@ -# CLI v2 Documentation - -The current version of Infinity uses the following arguments in its CLI: -Note: The section below is auto-generated by the makefile. - ```bash infinity_emb v2 --help - - Usage: infinity_emb v2 [OPTIONS] - - Infinity API ♾️ cli v2. MIT License. Copyright (c) 2023-now Michael Feil - Multiple Model CLI Playbook: - - 1. cli options can be overloaded i.e. `v2 --model-id model/id1 --model-id/id2 --batch-size 8 --batch-size 4` - - 2. or adapt the defaults by setting ENV Variables separated by `;`: INFINITY_MODEL_ID="model/id1;model/id2;" && - INFINITY_BATCH_SIZE="8;4;" - - 3. single items are broadcasted to `--model-id` length, making `v2 --model-id model/id1 --model-id/id2 --batch-size 8` both - models have batch-size 8. - -╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ --model-id TEXT Huggingface model repo id. Subset │ -│ of possible models: │ -│ https://huggingface.co/models?othe… │ -│ [env var: `INFINITY_MODEL_ID`] │ -│ [default: │ -│ michaelfeil/bge-small-en-v1.5] │ -│ --served-model-name TEXT the nickname for the API, under │ -│ which the model_id can be selected │ -│ [env var: │ -│ `INFINITY_SERVED_MODEL_NAME`] │ -│ --batch-size INTEGER maximum batch size for inference │ -│ [env var: `INFINITY_BATCH_SIZE`] │ -│ [default: 32] │ -│ --revision TEXT huggingface model repo revision. │ -│ [env var: `INFINITY_REVISION`] │ -│ --trust-remote-code --no-trust-remote-code if potential remote modeling code │ -│ from huggingface repo is trusted. │ -│ [env var: │ -│ `INFINITY_TRUST_REMOTE_CODE`] │ -│ [default: trust-remote-code] │ -│ --engine [torch|ctranslate2|optimum|debuge Which backend to use. `torch` uses │ -│ ngine] Pytorch GPU/CPU, optimum uses ONNX │ -│ on GPU/CPU/NVIDIA-TensorRT, │ -│ `CTranslate2` uses │ -│ torch+ctranslate2 on CPU/GPU. │ -│ [env var: `INFINITY_ENGINE`] │ -│ [default: torch] │ -│ --model-warmup --no-model-warmup if model should be warmed up after │ -│ startup, and before ready. │ -│ [env var: `INFINITY_MODEL_WARMUP`] │ -│ [default: model-warmup] │ -│ --vector-disk-cache --no-vector-disk-cache If hash(request)/results should be │ -│ cached to SQLite for latency │ -│ improvement. │ -│ [env var: │ -│ `INFINITY_VECTOR_DISK_CACHE`] │ -│ [default: vector-disk-cache] │ -│ --device [cpu|cuda|mps|tensorrt|auto] device to use for computing the │ -│ model forward pass. │ -│ [env var: `INFINITY_DEVICE`] │ -│ [default: auto] │ -│ --lengths-via-tokenize --no-lengths-via-tokenize if True, returned tokens is based │ -│ on actual tokenizer count. If │ -│ false, uses len(input) as proxy. │ -│ [env var: │ -│ `INFINITY_LENGTHS_VIA_TOKENIZE`] │ -│ [default: lengths-via-tokenize] │ -│ --dtype [float32|float16|int8|fp8|auto] dtype for the model weights. │ -│ [env var: `INFINITY_DTYPE`] │ -│ [default: auto] │ -│ --embedding-dtype [float32|int8|uint8|binary|ubinar dtype post-forward pass. If != │ -│ y] `float32`, using Post-Forward │ -│ Static quantization. │ -│ [env var: │ -│ `INFINITY_EMBEDDING_DTYPE`] │ -│ [default: float32] │ -│ --pooling-method [mean|cls|auto] overwrite the pooling method if │ -│ inferred incorrectly. │ -│ [env var: │ -│ `INFINITY_POOLING_METHOD`] │ -│ [default: auto] │ -│ --compile --no-compile Enable usage of │ -│ `torch.compile(dynamic=True)` if │ -│ engine relies on it. │ -│ [env var: `INFINITY_COMPILE`] │ -│ [default: compile] │ -│ --bettertransformer --no-bettertransformer Enables varlen flash-attention-2 │ -│ via the `BetterTransformer` │ -│ implementation. If available for │ -│ this model. │ -│ [env var: │ -│ `INFINITY_BETTERTRANSFORMER`] │ -│ [default: bettertransformer] │ -│ --preload-only --no-preload-only If true, only downloads models and │ -│ verifies setup, then exit. │ -│ Recommended for pre-caching the │ -│ download in a Dockerfile. │ -│ [env var: `INFINITY_PRELOAD_ONLY`] │ -│ [default: no-preload-only] │ -│ --host TEXT host for the FastAPI uvicorn server │ -│ [env var: `INFINITY_HOST`] │ -│ [default: 0.0.0.0] │ -│ --port INTEGER port for the FastAPI uvicorn server │ -│ [env var: `INFINITY_PORT`] │ -│ [default: 7997] │ -│ --url-prefix TEXT prefix for all routes of the │ -│ FastAPI uvicorn server. Useful if │ -│ you run behind a proxy / cascaded │ -│ API. │ -│ [env var: `INFINITY_URL_PREFIX`] │ -│ --redirect-slash TEXT where to redirect `/` requests to. │ -│ [env var: │ -│ `INFINITY_REDIRECT_SLASH`] │ -│ [default: /docs] │ -│ --log-level [critical|error|warning|info|debu console log level. │ -│ g|trace] [env var: `INFINITY_LOG_LEVEL`] │ -│ [default: info] │ -│ --permissive-cors --no-permissive-cors whether to allow permissive cors. │ -│ [env var: │ -│ `INFINITY_PERMISSIVE_CORS`] │ -│ [default: no-permissive-cors] │ -│ --api-key TEXT api_key used for authentication │ -│ headers. │ -│ [env var: `INFINITY_API_KEY`] │ -│ --proxy-root-path TEXT Proxy prefix for the application. │ -│ See: │ -│ https://fastapi.tiangolo.com/advan… │ -│ [env var: │ -│ `INFINITY_PROXY_ROOT_PATH`] │ -│ --help Show this message and exit. │ -╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -``` + Usage: infinity_emb v2 [OPTIONS] + + Infinity API ♾️ cli v2. MIT License. Copyright (c) 2023-now Michael Feil + Multiple Model CLI Playbook: + - 1. cli options can be overloaded i.e. `v2 --model-id model/id1 --model-id/id2 --batch-size 8 --batch-size 4` + - 2. or adapt the defaults by setting ENV Variables separated by `;`: INFINITY_MODEL_ID="model/id1;model/id2;" && INFINITY_BATCH_SIZE="8;4;" + - 3. single items are broadcasted to `--model-id` length, making `v2 --model-id model/id1 --model-id/id2 --batch-size 8` both models have batch-size 8. + +╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --model-id TEXT Huggingface model repo id. Subset of possible models: https://huggingface.co/models?other=text-embeddings-inference& [env var: `INFINITY_MODEL_ID`] │ +│ [default: michaelfeil/bge-small-en-v1.5] │ +│ --served-model-name TEXT the nickname for the API, under which the model_id can be selected [env var: `INFINITY_SERVED_MODEL_NAME`] │ +│ --batch-size INTEGER maximum batch size for inference [env var: `INFINITY_BATCH_SIZE`] [default: 32] │ +│ --revision TEXT huggingface model repo revision. [env var: `INFINITY_REVISION`] │ +│ --trust-remote-code --no-trust-remote-code if potential remote modeling code from huggingface repo is trusted. [env var: `INFINITY_TRUST_REMOTE_CODE`] [default: trust-remote-code] │ +│ --engine [torch|ctranslate2|optimum|debugengine] Which backend to use. `torch` uses Pytorch GPU/CPU, optimum uses ONNX on GPU/CPU/NVIDIA-TensorRT, `CTranslate2` uses torch+ctranslate2 on CPU/GPU. │ +│ [env var: `INFINITY_ENGINE`] │ +│ [default: torch] │ +│ --model-warmup --no-model-warmup if model should be warmed up after startup, and before ready. [env var: `INFINITY_MODEL_WARMUP`] [default: model-warmup] │ +│ --vector-disk-cache --no-vector-disk-cache If hash(request)/results should be cached to SQLite for latency improvement. [env var: `INFINITY_VECTOR_DISK_CACHE`] [default: vector-disk-cache] │ +│ --device [cpu|cuda|mps|tensorrt|auto] device to use for computing the model forward pass. [env var: `INFINITY_DEVICE`] [default: auto] │ +│ --lengths-via-tokenize --no-lengths-via-tokenize if True, returned tokens is based on actual tokenizer count. If false, uses len(input) as proxy. [env var: `INFINITY_LENGTHS_VIA_TOKENIZE`] │ +│ [default: lengths-via-tokenize] │ +│ --dtype [float32|float16|int8|fp8|auto] dtype for the model weights. [env var: `INFINITY_DTYPE`] [default: auto] │ +│ --embedding-dtype [float32|int8|uint8|binary|ubinary] dtype post-forward pass. If != `float32`, using Post-Forward Static quantization. [env var: `INFINITY_EMBEDDING_DTYPE`] [default: float32] │ +│ --pooling-method [mean|cls|auto] overwrite the pooling method if inferred incorrectly. [env var: `INFINITY_POOLING_METHOD`] [default: auto] │ +│ --compile --no-compile Enable usage of `torch.compile(dynamic=True)` if engine relies on it. [env var: `INFINITY_COMPILE`] [default: compile] │ +│ --bettertransformer --no-bettertransformer Enables varlen flash-attention-2 via the `BetterTransformer` implementation. If available for this model. [env var: `INFINITY_BETTERTRANSFORMER`] │ +│ [default: bettertransformer] │ +│ --preload-only --no-preload-only If true, only downloads models and verifies setup, then exit. Recommended for pre-caching the download in a Dockerfile. [env var: `INFINITY_PRELOAD_ONLY`] │ +│ [default: no-preload-only] │ +│ --host TEXT host for the FastAPI uvicorn server [env var: `INFINITY_HOST`] [default: 0.0.0.0] │ +│ --port INTEGER port for the FastAPI uvicorn server [env var: `INFINITY_PORT`] [default: 7997] │ +│ --url-prefix TEXT prefix for all routes of the FastAPI uvicorn server. Useful if you run behind a proxy / cascaded API. [env var: `INFINITY_URL_PREFIX`] │ +│ --redirect-slash TEXT where to redirect `/` requests to. [env var: `INFINITY_REDIRECT_SLASH`] [default: /docs] │ +│ --log-level [critical|error|warning|info|debug|trace] console log level. [env var: `INFINITY_LOG_LEVEL`] [default: info] │ +│ --permissive-cors --no-permissive-cors whether to allow permissive cors. [env var: `INFINITY_PERMISSIVE_CORS`] [default: no-permissive-cors] │ +│ --api-key TEXT api_key used for authentication headers. [env var: `INFINITY_API_KEY`] │ +│ --proxy-root-path TEXT Proxy prefix for the application. See: https://fastapi.tiangolo.com/advanced/behind-a-proxy/ [env var: `INFINITY_PROXY_ROOT_PATH`] │ +│ --help Show this message and exit. │ +╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ \ No newline at end of file From b48e8089b1d5c79b4e63596a7885566c60e85860 Mon Sep 17 00:00:00 2001 From: wirthual Date: Fri, 27 Sep 2024 04:47:12 +0200 Subject: [PATCH 05/14] revert docs v2 --- docs/docs/cli_v2.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/docs/cli_v2.md b/docs/docs/cli_v2.md index cc3b3317..31e0c743 100644 --- a/docs/docs/cli_v2.md +++ b/docs/docs/cli_v2.md @@ -41,4 +41,6 @@ infinity_emb v2 --help │ --api-key TEXT api_key used for authentication headers. [env var: `INFINITY_API_KEY`] │ │ --proxy-root-path TEXT Proxy prefix for the application. See: https://fastapi.tiangolo.com/advanced/behind-a-proxy/ [env var: `INFINITY_PROXY_ROOT_PATH`] │ │ --help Show this message and exit. │ -╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ \ No newline at end of file +╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + +``` \ No newline at end of file From ef245358610b544e4509c3d5d2f92f88065d00e5 Mon Sep 17 00:00:00 2001 From: wirthual Date: Fri, 27 Sep 2024 17:23:44 +0200 Subject: [PATCH 06/14] fix test cases --- .../tests/end_to_end/test_torch_audio.py | 21 ++++++++++++++----- .../tests/end_to_end/test_torch_vision.py | 15 ++++++++++--- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py index 99672a5b..ed264d2d 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py @@ -62,15 +62,15 @@ async def test_audio_single(client): @pytest.mark.anyio -@pytest.mark.parametrize("no_of_audios", [0, 1, 5, 10]) +@pytest.mark.parametrize("no_of_audios", [1, 5, 10]) async def test_audio_multiple(client, no_of_audios): - audio_url = [ + audio_urls = [ "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" ] * no_of_audios response = await client.post( - f"{PREFIX}/embeddings_image", - json={"model": MODEL, "input": audio_url}, + f"{PREFIX}/embeddings_audio", + json={"model": MODEL, "input": audio_urls}, ) assert response.status_code == 200 rdata = response.json() @@ -88,7 +88,18 @@ async def test_audio_fail(client): audio_url = "https://www.google.com/404" response = await client.post( - f"{PREFIX}/embeddings_image", + f"{PREFIX}/embeddings_audio", json={"model": MODEL, "input": audio_url}, ) assert response.status_code == status.HTTP_400_BAD_REQUEST + + +@pytest.mark.anyio +async def test_audio_empty(client): + audio_url_empty = [] + + response_empty = await client.post( + f"{PREFIX}/embeddings_audio", + json={"model": MODEL, "input": audio_url_empty}, + ) + assert response_empty.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY \ No newline at end of file diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py index 2d41f10e..3db5c26d 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py @@ -62,15 +62,15 @@ async def test_vision_single(client): @pytest.mark.anyio -@pytest.mark.parametrize("no_of_images", [0, 1, 5, 10]) +@pytest.mark.parametrize("no_of_images", [1, 5, 10]) async def test_vision_multiple(client, no_of_images): - image_url = [ + image_urls = [ "http://images.cocodataset.org/val2017/000000039769.jpg" ] * no_of_images response = await client.post( f"{PREFIX}/embeddings_image", - json={"model": MODEL, "input": image_url}, + json={"model": MODEL, "input": image_urls}, ) assert response.status_code == 200 rdata = response.json() @@ -92,3 +92,12 @@ async def test_vision_fail(client): json={"model": MODEL, "input": image_url}, ) assert response.status_code == status.HTTP_400_BAD_REQUEST + +@pytest.mark.anyio +async def test_vision_empty(client): + image_url_empty = [] + response = await client.post( + f"{PREFIX}/embeddings_image", + json={"model": MODEL, "input": image_url_empty}, + ) + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY \ No newline at end of file From a894760ffd901c6781f87d0d66e58fcd618911e3 Mon Sep 17 00:00:00 2001 From: wirthual Date: Sat, 28 Sep 2024 16:38:24 +0200 Subject: [PATCH 07/14] add test for text only vision case --- .../tests/end_to_end/test_torch_audio.py | 11 ++++---- .../tests/end_to_end/test_torch_vision.py | 28 +++++++++++++++---- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py index ed264d2d..81db3a18 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py @@ -76,11 +76,10 @@ async def test_audio_multiple(client, no_of_audios): rdata = response.json() rdata_results = rdata["data"] assert len(rdata_results) == no_of_audios - if no_of_audios: - assert "model" in rdata - assert "usage" in rdata - assert rdata_results[0]["object"] == "embedding" - assert len(rdata_results[0]["embedding"]) > 0 + assert "model" in rdata + assert "usage" in rdata + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 @pytest.mark.anyio @@ -102,4 +101,4 @@ async def test_audio_empty(client): f"{PREFIX}/embeddings_audio", json={"model": MODEL, "input": audio_url_empty}, ) - assert response_empty.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY \ No newline at end of file + assert response_empty.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py index 3db5c26d..9b0fb8d7 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py @@ -61,6 +61,22 @@ async def test_vision_single(client): assert len(rdata_results[0]["embedding"]) > 0 +@pytest.mark.anyio +async def test_vision_single_text_only(client): + text = "a image of a cat" + + response = await client.post( + f"{PREFIX}/embeddings_image", + json={"model": MODEL, "input": text}, + ) + assert response.status_code == 200 + rdata = response.json() + assert "model" in rdata + assert "usage" in rdata + rdata_results = rdata["data"] + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 + @pytest.mark.anyio @pytest.mark.parametrize("no_of_images", [1, 5, 10]) async def test_vision_multiple(client, no_of_images): @@ -76,11 +92,10 @@ async def test_vision_multiple(client, no_of_images): rdata = response.json() rdata_results = rdata["data"] assert len(rdata_results) == no_of_images - if no_of_images: - assert "model" in rdata - assert "usage" in rdata - assert rdata_results[0]["object"] == "embedding" - assert len(rdata_results[0]["embedding"]) > 0 + assert "model" in rdata + assert "usage" in rdata + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 @pytest.mark.anyio @@ -93,6 +108,7 @@ async def test_vision_fail(client): ) assert response.status_code == status.HTTP_400_BAD_REQUEST + @pytest.mark.anyio async def test_vision_empty(client): image_url_empty = [] @@ -100,4 +116,4 @@ async def test_vision_empty(client): f"{PREFIX}/embeddings_image", json={"model": MODEL, "input": image_url_empty}, ) - assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY \ No newline at end of file + assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY From a592419f81e79f3ae0ee2871f2d38fa843d6a753 Mon Sep 17 00:00:00 2001 From: wirthual Date: Sat, 28 Sep 2024 16:40:51 +0200 Subject: [PATCH 08/14] add text only case for audio --- .../tests/end_to_end/test_torch_audio.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py index 81db3a18..4795b74e 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py @@ -61,6 +61,22 @@ async def test_audio_single(client): assert len(rdata_results[0]["embedding"]) > 0 +@pytest.mark.anyio +async def test_audio_single_text_only(client): + text = "a sound of a at" + + response = await client.post( + f"{PREFIX}/embeddings_audio", + json={"model": MODEL, "input": text}, + ) + assert response.status_code == 200 + rdata = response.json() + assert "model" in rdata + assert "usage" in rdata + rdata_results = rdata["data"] + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 + @pytest.mark.anyio @pytest.mark.parametrize("no_of_audios", [1, 5, 10]) async def test_audio_multiple(client, no_of_audios): From ad2d262b99369f0289e7137840ffa2bddfda3436 Mon Sep 17 00:00:00 2001 From: wirthual Date: Sat, 28 Sep 2024 17:29:19 +0200 Subject: [PATCH 09/14] format code --- libs/infinity_emb/tests/end_to_end/test_torch_audio.py | 1 + libs/infinity_emb/tests/end_to_end/test_torch_vision.py | 1 + 2 files changed, 2 insertions(+) diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py index 4795b74e..d58880c9 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py @@ -77,6 +77,7 @@ async def test_audio_single_text_only(client): assert rdata_results[0]["object"] == "embedding" assert len(rdata_results[0]["embedding"]) > 0 + @pytest.mark.anyio @pytest.mark.parametrize("no_of_audios", [1, 5, 10]) async def test_audio_multiple(client, no_of_audios): diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py index 9b0fb8d7..be40044f 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py @@ -77,6 +77,7 @@ async def test_vision_single_text_only(client): assert rdata_results[0]["object"] == "embedding" assert len(rdata_results[0]["embedding"]) > 0 + @pytest.mark.anyio @pytest.mark.parametrize("no_of_images", [1, 5, 10]) async def test_vision_multiple(client, no_of_images): From 074105db340f05ded0509bdc0cabfeb431f56587 Mon Sep 17 00:00:00 2001 From: wirthual Date: Sat, 28 Sep 2024 18:03:48 +0200 Subject: [PATCH 10/14] skip text test for not to see updated coverage --- libs/infinity_emb/tests/end_to_end/test_torch_audio.py | 1 + libs/infinity_emb/tests/end_to_end/test_torch_vision.py | 1 + 2 files changed, 2 insertions(+) diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py index d58880c9..80625b43 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py @@ -62,6 +62,7 @@ async def test_audio_single(client): @pytest.mark.anyio +@pytest.mark.skip("text only") async def test_audio_single_text_only(client): text = "a sound of a at" diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py index be40044f..f6e5b8a1 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py @@ -62,6 +62,7 @@ async def test_vision_single(client): @pytest.mark.anyio +@pytest.mark.skip("text only") async def test_vision_single_text_only(client): text = "a image of a cat" From f2925c87f15227843aa8f05d5435afc75d2babac Mon Sep 17 00:00:00 2001 From: wirthual Date: Sun, 29 Sep 2024 04:22:09 +0200 Subject: [PATCH 11/14] revert cli doc from main branch --- docs/docs/cli_v2.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/docs/cli_v2.md b/docs/docs/cli_v2.md index 31e0c743..33fd3d3a 100644 --- a/docs/docs/cli_v2.md +++ b/docs/docs/cli_v2.md @@ -1,14 +1,19 @@ +# CLI v2 Documentation + +The current version of Infinity uses the following arguments in its CLI: +Note: The section below is auto-generated by the makefile. + ```bash infinity_emb v2 --help - + Usage: infinity_emb v2 [OPTIONS] - + Infinity API ♾️ cli v2. MIT License. Copyright (c) 2023-now Michael Feil Multiple Model CLI Playbook: - 1. cli options can be overloaded i.e. `v2 --model-id model/id1 --model-id/id2 --batch-size 8 --batch-size 4` - 2. or adapt the defaults by setting ENV Variables separated by `;`: INFINITY_MODEL_ID="model/id1;model/id2;" && INFINITY_BATCH_SIZE="8;4;" - 3. single items are broadcasted to `--model-id` length, making `v2 --model-id model/id1 --model-id/id2 --batch-size 8` both models have batch-size 8. - + ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ --model-id TEXT Huggingface model repo id. Subset of possible models: https://huggingface.co/models?other=text-embeddings-inference& [env var: `INFINITY_MODEL_ID`] │ │ [default: michaelfeil/bge-small-en-v1.5] │ @@ -43,4 +48,4 @@ infinity_emb v2 --help │ --help Show this message and exit. │ ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -``` \ No newline at end of file +``` From 7558e7a525519ad9b02eb847c5d59b577b17fe31 Mon Sep 17 00:00:00 2001 From: wirthual Date: Mon, 30 Sep 2024 02:49:11 +0200 Subject: [PATCH 12/14] add changes to support text and urls --- .../infinity_emb/fastapi_schemas/pymodels.py | 13 ++++- .../infinity_emb/infinity_server.py | 44 ++++++++++++--- .../tests/end_to_end/test_torch_audio.py | 56 ++++++++++++++++++- 3 files changed, 101 insertions(+), 12 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py index 3852b497..7bb93f41 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py @@ -93,8 +93,17 @@ class ImageEmbeddingInput(BaseModel): user: Optional[str] = None -class AudioEmbeddingInput(ImageEmbeddingInput): - pass +class AudioEmbeddingInput(BaseModel): + input: Union[ # type: ignore + conlist( # type: ignore + Union[Annotated[AnyUrl, HttpUrl], str], + **ITEMS_LIMIT_SMALL, + ), + Union[Annotated[AnyUrl, HttpUrl], str], + ] + model: str = "default/not-specified" + encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float + user: Optional[str] = None class _EmbeddingObject(BaseModel): diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index 73bb5996..efcd60a5 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -8,6 +8,7 @@ import time from contextlib import asynccontextmanager from typing import Any, Optional +from urllib.parse import urlparse import infinity_emb from infinity_emb._optional_imports import CHECK_TYPER, CHECK_UVICORN @@ -411,29 +412,54 @@ async def _embeddings_audio(data: AudioEmbeddingInput): json={"model":"laion/larger_clap_general","input":["https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"]}) """ engine = _resolve_engine(data.model) - if hasattr(data.input, "host"): - # if it is a single url - audio_inputs = [str(data.input)] + input_list: list[str] = [] + if isinstance(data.input, str): + input_list.append(data.input) else: - audio_inputs = [str(d) for d in data.input] # type: ignore + input_list = data.input # type: ignore + audio_urls = [] + texts = [] + is_audios = [] + for input in input_list: + parsed_url = urlparse(input) + # Todo: Improve url check + if parsed_url.netloc and parsed_url.scheme: + # if it is a single url + audio_urls.append(str(input)) + is_audios.append(True) + else: + texts.append(input) # type: ignore + is_audios.append(False) try: - logger.debug("[📝] Received request with %s Urls ", len(audio_inputs)) + logger.debug( + f"[📝] Received request with {len(audio_urls)} Urls and {len(texts)} sentences" + ) start = time.perf_counter() - embedding, usage = await engine.audio_embed(audios=audio_inputs) # type: ignore + if audio_urls: + audio_embeddings, usage = await engine.audio_embed(audios=audio_urls) # type: ignore + if texts: + text_embeddings, usage = await engine.embed(sentences=texts) + + embeddings_with_restored_order = [] + for is_audio in is_audios: + if is_audio: + embeddings_with_restored_order.append(audio_embeddings.pop(0)) + else: + embeddings_with_restored_order.append(text_embeddings.pop(0)) duration = (time.perf_counter() - start) * 1000 - logger.debug("[✅] Done in %s ms", duration) + logger.debug(f"[✅] Done in {duration} ms") return OpenAIEmbeddingResult.to_embeddings_response( - embeddings=embedding, + embeddings=embeddings_with_restored_order, engine_args=engine.engine_args, encoding_format=data.encoding_format, usage=usage, ) except AudioCorruption as ex: raise errors.OpenAIException( - f"AudioCorruption, could not open {audio_inputs} -> {ex}", + f"AudioCorruption, could not open {audio_urls} -> {ex}", code=status.HTTP_400_BAD_REQUEST, ) except ModelNotDeployedError as ex: diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py index 80625b43..415a1287 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py @@ -25,6 +25,13 @@ ) +def cosine_similarity(a, b): + from numpy import dot + from numpy.linalg import norm + + return dot(a, b) / (norm(a) * norm(b)) + + @pytest.fixture() async def client(): async with AsyncClient( @@ -62,7 +69,6 @@ async def test_audio_single(client): @pytest.mark.anyio -@pytest.mark.skip("text only") async def test_audio_single_text_only(client): text = "a sound of a at" @@ -79,6 +85,54 @@ async def test_audio_single_text_only(client): assert len(rdata_results[0]["embedding"]) > 0 +@pytest.mark.anyio +@pytest.mark.parametrize("no_of_input_pairs", [1, 5]) +async def test_audio_text_url_mixed(client, no_of_input_pairs): + text = "a sound of a at" + audio_url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" + + input = [text, audio_url] * no_of_input_pairs + + response = await client.post( + f"{PREFIX}/embeddings_audio", + json={"model": MODEL, "input": input}, + ) + assert response.status_code == 200 + rdata = response.json() + assert "model" in rdata + assert "usage" in rdata + rdata_results = rdata["data"] + assert rdata_results[0]["object"] == "embedding" + assert len(rdata_results[0]["embedding"]) > 0 + assert len(rdata_results) == len(input) + + +@pytest.mark.anyio +async def test_meta(client): + audio_url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" + + input = [audio_url, "a beep", "a horse", "a fish"] + + response = await client.post( + f"{PREFIX}/embeddings_audio", + json={"model": MODEL, "input": input}, + ) + assert response.status_code == 200 + rdata = response.json() + rdata_results = rdata["data"] + + embeddings_audio_beep = rdata_results[0]["embedding"] + embeddings_text_beep = rdata_results[1]["embedding"] + embeddings_text_horse = rdata_results[2]["embedding"] + embeddings_text_fish = rdata_results[3]["embedding"] + assert cosine_similarity( + embeddings_audio_beep, embeddings_text_beep + ) > cosine_similarity(embeddings_audio_beep, embeddings_text_fish) + assert cosine_similarity( + embeddings_audio_beep, embeddings_text_beep + ) > cosine_similarity(embeddings_audio_beep, embeddings_text_horse) + + @pytest.mark.anyio @pytest.mark.parametrize("no_of_audios", [1, 5, 10]) async def test_audio_multiple(client, no_of_audios): From 93ecc5dd8cec36db4e26e75a34ef412224d78c1b Mon Sep 17 00:00:00 2001 From: wirthual Date: Mon, 30 Sep 2024 16:55:52 +0200 Subject: [PATCH 13/14] address comments. Report correct usage --- .../infinity_emb/infinity_server.py | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index efcd60a5..da98d720 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -33,6 +33,7 @@ Device, Dtype, EmbeddingDtype, + EmbeddingReturnType, ImageCorruption, InferenceEngine, ModelNotDeployedError, @@ -412,15 +413,15 @@ async def _embeddings_audio(data: AudioEmbeddingInput): json={"model":"laion/larger_clap_general","input":["https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"]}) """ engine = _resolve_engine(data.model) - input_list: list[str] = [] + sentences_and_urls: list[str] = [] if isinstance(data.input, str): - input_list.append(data.input) + sentences_and_urls.append(data.input) else: - input_list = data.input # type: ignore + sentences_and_urls = data.input # type: ignore audio_urls = [] - texts = [] + sentences = [] is_audios = [] - for input in input_list: + for input in sentences_and_urls: parsed_url = urlparse(input) # Todo: Improve url check if parsed_url.netloc and parsed_url.scheme: @@ -428,18 +429,23 @@ async def _embeddings_audio(data: AudioEmbeddingInput): audio_urls.append(str(input)) is_audios.append(True) else: - texts.append(input) # type: ignore + sentences.append(input) # type: ignore is_audios.append(False) try: logger.debug( - f"[📝] Received request with {len(audio_urls)} Urls and {len(texts)} sentences" + f"[📝] Received request with {len(audio_urls)} Urls and {len(sentences)} sentences" ) start = time.perf_counter() + total_usage = 0 + audio_embeddings: list[EmbeddingReturnType] = [] + text_embeddings: list[EmbeddingReturnType] = [] if audio_urls: - audio_embeddings, usage = await engine.audio_embed(audios=audio_urls) # type: ignore - if texts: - text_embeddings, usage = await engine.embed(sentences=texts) + audio_embeddings, usage_audio = await engine.audio_embed(audios=audio_urls) # type: ignore + total_usage += usage_audio + if sentences: + text_embeddings, usage_text = await engine.embed(sentences=sentences) + total_usage += usage_text embeddings_with_restored_order = [] for is_audio in is_audios: @@ -455,7 +461,7 @@ async def _embeddings_audio(data: AudioEmbeddingInput): embeddings=embeddings_with_restored_order, engine_args=engine.engine_args, encoding_format=data.encoding_format, - usage=usage, + usage=total_usage, ) except AudioCorruption as ex: raise errors.OpenAIException( From a74a5dfe40997bb7f152cdb9c5c21b07bb8bdb81 Mon Sep 17 00:00:00 2001 From: wirthual Date: Tue, 1 Oct 2024 03:24:41 +0200 Subject: [PATCH 14/14] update endpoint usage for mixed case. Extend vision cases --- .../infinity_emb/fastapi_schemas/pymodels.py | 13 +--- .../infinity_emb/infinity_server.py | 52 +++---------- .../infinity_emb/tests/end_to_end/conftest.py | 6 ++ .../tests/end_to_end/test_torch_audio.py | 75 ++++++++----------- .../tests/end_to_end/test_torch_vision.py | 49 +++++++++++- 5 files changed, 98 insertions(+), 97 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py index 7bb93f41..3852b497 100644 --- a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py +++ b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py @@ -93,17 +93,8 @@ class ImageEmbeddingInput(BaseModel): user: Optional[str] = None -class AudioEmbeddingInput(BaseModel): - input: Union[ # type: ignore - conlist( # type: ignore - Union[Annotated[AnyUrl, HttpUrl], str], - **ITEMS_LIMIT_SMALL, - ), - Union[Annotated[AnyUrl, HttpUrl], str], - ] - model: str = "default/not-specified" - encoding_format: EmbeddingEncodingFormat = EmbeddingEncodingFormat.float - user: Optional[str] = None +class AudioEmbeddingInput(ImageEmbeddingInput): + pass class _EmbeddingObject(BaseModel): diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index da98d720..73bb5996 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -8,7 +8,6 @@ import time from contextlib import asynccontextmanager from typing import Any, Optional -from urllib.parse import urlparse import infinity_emb from infinity_emb._optional_imports import CHECK_TYPER, CHECK_UVICORN @@ -33,7 +32,6 @@ Device, Dtype, EmbeddingDtype, - EmbeddingReturnType, ImageCorruption, InferenceEngine, ModelNotDeployedError, @@ -413,59 +411,29 @@ async def _embeddings_audio(data: AudioEmbeddingInput): json={"model":"laion/larger_clap_general","input":["https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav"]}) """ engine = _resolve_engine(data.model) - sentences_and_urls: list[str] = [] - if isinstance(data.input, str): - sentences_and_urls.append(data.input) + if hasattr(data.input, "host"): + # if it is a single url + audio_inputs = [str(data.input)] else: - sentences_and_urls = data.input # type: ignore - audio_urls = [] - sentences = [] - is_audios = [] - for input in sentences_and_urls: - parsed_url = urlparse(input) - # Todo: Improve url check - if parsed_url.netloc and parsed_url.scheme: - # if it is a single url - audio_urls.append(str(input)) - is_audios.append(True) - else: - sentences.append(input) # type: ignore - is_audios.append(False) + audio_inputs = [str(d) for d in data.input] # type: ignore try: - logger.debug( - f"[📝] Received request with {len(audio_urls)} Urls and {len(sentences)} sentences" - ) + logger.debug("[📝] Received request with %s Urls ", len(audio_inputs)) start = time.perf_counter() - total_usage = 0 - audio_embeddings: list[EmbeddingReturnType] = [] - text_embeddings: list[EmbeddingReturnType] = [] - if audio_urls: - audio_embeddings, usage_audio = await engine.audio_embed(audios=audio_urls) # type: ignore - total_usage += usage_audio - if sentences: - text_embeddings, usage_text = await engine.embed(sentences=sentences) - total_usage += usage_text - - embeddings_with_restored_order = [] - for is_audio in is_audios: - if is_audio: - embeddings_with_restored_order.append(audio_embeddings.pop(0)) - else: - embeddings_with_restored_order.append(text_embeddings.pop(0)) + embedding, usage = await engine.audio_embed(audios=audio_inputs) # type: ignore duration = (time.perf_counter() - start) * 1000 - logger.debug(f"[✅] Done in {duration} ms") + logger.debug("[✅] Done in %s ms", duration) return OpenAIEmbeddingResult.to_embeddings_response( - embeddings=embeddings_with_restored_order, + embeddings=embedding, engine_args=engine.engine_args, encoding_format=data.encoding_format, - usage=total_usage, + usage=usage, ) except AudioCorruption as ex: raise errors.OpenAIException( - f"AudioCorruption, could not open {audio_urls} -> {ex}", + f"AudioCorruption, could not open {audio_inputs} -> {ex}", code=status.HTTP_400_BAD_REQUEST, ) except ModelNotDeployedError as ex: diff --git a/libs/infinity_emb/tests/end_to_end/conftest.py b/libs/infinity_emb/tests/end_to_end/conftest.py index e443ee04..3ce61fc8 100644 --- a/libs/infinity_emb/tests/end_to_end/conftest.py +++ b/libs/infinity_emb/tests/end_to_end/conftest.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from numpy import dot +from numpy.linalg import norm class Helpers: @@ -98,6 +100,10 @@ async def embedding_verify(client, model_base, prefix, model_name, decimal=3): embedding["embedding"], st_embedding, decimal=decimal ) + @staticmethod + def cosine_similarity(a, b): + return dot(a, b) / (norm(a) * norm(b)) + @pytest.fixture def helpers(): diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py index 415a1287..120e7c63 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_audio.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_audio.py @@ -25,13 +25,6 @@ ) -def cosine_similarity(a, b): - from numpy import dot - from numpy.linalg import norm - - return dot(a, b) / (norm(a) * norm(b)) - - @pytest.fixture() async def client(): async with AsyncClient( @@ -73,7 +66,7 @@ async def test_audio_single_text_only(client): text = "a sound of a at" response = await client.post( - f"{PREFIX}/embeddings_audio", + f"{PREFIX}/embeddings", json={"model": MODEL, "input": text}, ) assert response.status_code == 200 @@ -86,51 +79,40 @@ async def test_audio_single_text_only(client): @pytest.mark.anyio -@pytest.mark.parametrize("no_of_input_pairs", [1, 5]) -async def test_audio_text_url_mixed(client, no_of_input_pairs): - text = "a sound of a at" +async def test_meta(client, helpers): audio_url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" - input = [text, audio_url] * no_of_input_pairs - - response = await client.post( + text_input = ["a beep", "a horse", "a fish"] + audio_input = [audio_url] + response_text = await client.post( + f"{PREFIX}/embeddings", + json={"model": MODEL, "input": text_input}, + ) + response_audio = await client.post( f"{PREFIX}/embeddings_audio", - json={"model": MODEL, "input": input}, + json={"model": MODEL, "input": audio_input}, ) - assert response.status_code == 200 - rdata = response.json() - assert "model" in rdata - assert "usage" in rdata - rdata_results = rdata["data"] - assert rdata_results[0]["object"] == "embedding" - assert len(rdata_results[0]["embedding"]) > 0 - assert len(rdata_results) == len(input) + assert response_text.status_code == 200 + assert response_audio.status_code == 200 -@pytest.mark.anyio -async def test_meta(client): - audio_url = "https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav" + rdata_text = response_text.json() + rdata_results_text = rdata_text["data"] - input = [audio_url, "a beep", "a horse", "a fish"] + rdata_audio = response_audio.json() + rdata_results_audio = rdata_audio["data"] - response = await client.post( - f"{PREFIX}/embeddings_audio", - json={"model": MODEL, "input": input}, - ) - assert response.status_code == 200 - rdata = response.json() - rdata_results = rdata["data"] + embeddings_audio_beep = rdata_results_audio[0]["embedding"] + embeddings_text_beep = rdata_results_text[0]["embedding"] + embeddings_text_horse = rdata_results_text[1]["embedding"] + embeddings_text_fish = rdata_results_text[2]["embedding"] - embeddings_audio_beep = rdata_results[0]["embedding"] - embeddings_text_beep = rdata_results[1]["embedding"] - embeddings_text_horse = rdata_results[2]["embedding"] - embeddings_text_fish = rdata_results[3]["embedding"] - assert cosine_similarity( + assert helpers.cosine_similarity( embeddings_audio_beep, embeddings_text_beep - ) > cosine_similarity(embeddings_audio_beep, embeddings_text_fish) - assert cosine_similarity( + ) > helpers.cosine_similarity(embeddings_audio_beep, embeddings_text_fish) + assert helpers.cosine_similarity( embeddings_audio_beep, embeddings_text_beep - ) > cosine_similarity(embeddings_audio_beep, embeddings_text_horse) + ) > helpers.cosine_similarity(embeddings_audio_beep, embeddings_text_horse) @pytest.mark.anyio @@ -174,3 +156,12 @@ async def test_audio_empty(client): json={"model": MODEL, "input": audio_url_empty}, ) assert response_empty.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + +@pytest.mark.anyio +async def test_unsupported_endpoints(client): + response_unsupported = await client.post( + f"{PREFIX}/classify", + json={"model": MODEL, "input": ["test"]}, + ) + assert response_unsupported.status_code == status.HTTP_400_BAD_REQUEST diff --git a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py index f6e5b8a1..e9a7d68a 100644 --- a/libs/infinity_emb/tests/end_to_end/test_torch_vision.py +++ b/libs/infinity_emb/tests/end_to_end/test_torch_vision.py @@ -62,12 +62,11 @@ async def test_vision_single(client): @pytest.mark.anyio -@pytest.mark.skip("text only") async def test_vision_single_text_only(client): text = "a image of a cat" response = await client.post( - f"{PREFIX}/embeddings_image", + f"{PREFIX}/embeddings", json={"model": MODEL, "input": text}, ) assert response.status_code == 200 @@ -79,6 +78,43 @@ async def test_vision_single_text_only(client): assert len(rdata_results[0]["embedding"]) > 0 +@pytest.mark.anyio +async def test_meta(client, helpers): + image_url = "http://images.cocodataset.org/val2017/000000039769.jpg" + + text_input = ["a cat", "a car", "a fridge"] + image_input = [image_url] + response_text = await client.post( + f"{PREFIX}/embeddings", + json={"model": MODEL, "input": text_input}, + ) + response_image = await client.post( + f"{PREFIX}/embeddings_image", + json={"model": MODEL, "input": image_input}, + ) + + assert response_text.status_code == 200 + assert response_image.status_code == 200 + + rdata_text = response_text.json() + rdata_results_text = rdata_text["data"] + + rdata_image = response_image.json() + rdata_results_image = rdata_image["data"] + + embeddings_image_cat = rdata_results_image[0]["embedding"] + embeddings_text_cat = rdata_results_text[0]["embedding"] + embeddings_text_car = rdata_results_text[1]["embedding"] + embeddings_text_fridge = rdata_results_text[2]["embedding"] + + assert helpers.cosine_similarity( + embeddings_image_cat, embeddings_text_cat + ) > helpers.cosine_similarity(embeddings_image_cat, embeddings_text_car) + assert helpers.cosine_similarity( + embeddings_image_cat, embeddings_text_cat + ) > helpers.cosine_similarity(embeddings_image_cat, embeddings_text_fridge) + + @pytest.mark.anyio @pytest.mark.parametrize("no_of_images", [1, 5, 10]) async def test_vision_multiple(client, no_of_images): @@ -119,3 +155,12 @@ async def test_vision_empty(client): json={"model": MODEL, "input": image_url_empty}, ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY + + +@pytest.mark.anyio +async def test_unsupported_endpoints(client): + response_unsupported = await client.post( + f"{PREFIX}/classify", + json={"model": MODEL, "input": ["test"]}, + ) + assert response_unsupported.status_code == status.HTTP_400_BAD_REQUEST