Skip to content

Commit

Permalink
update pytest - change default model
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelfeil committed Nov 1, 2023
1 parent 4480e53 commit 4a31667
Show file tree
Hide file tree
Showing 12 changed files with 180 additions and 232 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ infinity_emb --help
### or launch the CLI using a pre-built docker container

```bash
model=sentence-transformers/all-MiniLM-L6-v2
model=BAAI/bge-small-en-v1.5
port=8080
docker run -it --gpus all -p $port:$port michaelf34/infinity:latest --model-name-or-path $model --port $port
```
Expand Down Expand Up @@ -109,7 +109,7 @@ The download path at runtime, can be controlled via the environment variable `SE
# Dockerfile for multiple models via multiple ports
FROM michaelf34/infinity:latest
ENTRYPOINT ["/bin/sh", "-c", \
"(/opt/poetry/bin/poetry run infinity_emb --port 8080 --model-name-or-path sentence-transformers/all-MiniLM-L6-v2 &);\
"(/opt/poetry/bin/poetry run infinity_emb --port 8080 --model-name-or-path BAAI/bge-small-en-v1.5 &);\
(/opt/poetry/bin/poetry run infinity_emb --port 8081 --model-name-or-path intfloat/e5-large-v2 )"]
```
Expand Down
4 changes: 2 additions & 2 deletions libs/infinity_emb/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ infinity_emb --help
### or launch the CLI using a pre-built docker container

```bash
model=sentence-transformers/all-MiniLM-L6-v2
model=BAAI/bge-small-en-v1.5
port=8080
docker run -it --gpus all -p $port:$port michaelf34/infinity:latest --model-name-or-path $model --port $port
```
Expand Down Expand Up @@ -109,7 +109,7 @@ The download path at runtime, can be controlled via the environment variable `SE
# Dockerfile for multiple models via multiple ports
FROM michaelf34/infinity:latest
ENTRYPOINT ["/bin/sh", "-c", \
"(/opt/poetry/bin/poetry run infinity_emb --port 8080 --model-name-or-path sentence-transformers/all-MiniLM-L6-v2 &);\
"(/opt/poetry/bin/poetry run infinity_emb --port 8080 --model-name-or-path BAAI/bge-small-en-v1.5 &);\
(/opt/poetry/bin/poetry run infinity_emb --port 8081 --model-name-or-path intfloat/e5-large-v2 )"]
```
Expand Down
8 changes: 4 additions & 4 deletions libs/infinity_emb/infinity_emb/infinity_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


def create_server(
model_name_or_path: str = "sentence-transformers/all-MiniLM-L6-v2",
model_name_or_path: str = "BAAI/bge-small-en-v1.5",
url_prefix: str = "/v1",
batch_size: int = 64,
engine: InferenceEngine = InferenceEngine.torch,
Expand Down Expand Up @@ -114,7 +114,7 @@ async def _embeddings(data: OpenAIEmbeddingInput):
```python
import requests
requests.post("https://..:8000/v1/embeddings",
json={"model":"all-MiniLM-L6-v2","input":["A sentence to encode."]})
json={"model":"bge-small-en-v1.5","input":["A sentence to encode."]})
"""
bh: BatchHandler = app.batch_handler
if bh.is_overloaded():
Expand Down Expand Up @@ -146,7 +146,7 @@ async def _embeddings(data: OpenAIEmbeddingInput):


def start_uvicorn(
model_name_or_path: str = "sentence-transformers/all-MiniLM-L6-v2",
model_name_or_path: str = "BAAI/bge-small-en-v1.5",
batch_size: int = 64,
url_prefix: str = "/v1",
host: str = "0.0.0.0",
Expand All @@ -160,7 +160,7 @@ def start_uvicorn(
Args:
model_name_or_path: str: Huggingface model, e.g.
"sentence-transformers/all-MiniLM-L6-v2".
"BAAI/bge-small-en-v1.5".
batch_size: int: batch size for forward pass.
url_prefix str: prefix for api. typically "/v1".
host str: host-url, typically either "0.0.0.0" or "127.0.0.1".
Expand Down
2 changes: 1 addition & 1 deletion libs/infinity_emb/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest
from sentence_transformers import InputExample, util # type: ignore

pytest.DEFAULT_BERT_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
pytest.DEFAULT_BERT_MODEL = "BAAI/bge-small-en-v1.5"


@pytest.fixture
Expand Down
101 changes: 101 additions & 0 deletions libs/infinity_emb/tests/end_to_end/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import asyncio
import random
import time

import numpy as np
import pytest


class Helpers:
@staticmethod
async def util_batch_embedding(
client,
sts_bechmark_dataset,
model_base,
prefix: str,
model_name: str,
batch_size: int,
downsample: int = 2,
):
sentences = []
for d in sts_bechmark_dataset:
for item in d:
sentences.append(item.texts[0])
random.shuffle(sentences)
sentences = sentences[::downsample]
# sentences = sentences[:batch_size*2]
dummy_sentences = ["test" * 512] * batch_size

async def _post_batch(inputs):
return await client.post(
f"{prefix}/embeddings", json=dict(input=inputs, model=model_name)
)

response = await _post_batch(inputs=dummy_sentences)

_request_size = int(batch_size * 1.5)
tasks = [
_post_batch(inputs=sentences[sl : sl + _request_size])
for sl in range(0, len(sentences), _request_size)
]
start = time.perf_counter()
_responses = await asyncio.gather(*tasks)
end = time.perf_counter()
time_api = end - start

responses = []
for response in _responses:
responses.extend(response.json()["data"])
for i in range(len(responses)):
responses[i] = responses[i]["embedding"]

model_base.encode(
dummy_sentences,
batch_size=batch_size,
normalize_embeddings=True,
convert_to_numpy=True,
)
start = time.perf_counter()
encodings = model_base.encode(sentences, batch_size=batch_size).tolist()
end = time.perf_counter()
time_st = end - start

responses = np.array(responses)
encodings = np.array(encodings)

for r, e in zip(responses, encodings):
cosine_sim = np.dot(r, e) / (np.linalg.norm(e) * np.linalg.norm(r))
assert cosine_sim > 0.95
np.testing.assert_almost_equal(
np.array(responses), np.array(encodings), decimal=0
)
assert time_api / time_st < 2.5

@staticmethod
async def embedding_verify(client, model_base, prefix, model_name):
possible_inputs = [
["This is a test sentence."],
["This is a test sentence.", "This is another test sentence."],
]

for inp in possible_inputs:
response = await client.post(
f"{prefix}/embeddings", json=dict(input=inp, model=model_name)
)
assert (
response.status_code == 200
), f"{response.status_code}, {response.text}"
rdata = response.json()
assert "data" in rdata and isinstance(rdata["data"], list)
assert all("embedding" in d for d in rdata["data"])
assert len(rdata["data"]) == len(inp)

want_embeddings = model_base.encode(inp)

for embedding, st_embedding in zip(rdata["data"], want_embeddings):
np.testing.assert_almost_equal(embedding["embedding"], st_embedding)


@pytest.fixture
def helpers():
return Helpers
77 changes: 10 additions & 67 deletions libs/infinity_emb/tests/end_to_end/test_ct2_sentence.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
import asyncio
import random
import time

import numpy as np
import pytest
import torch
Expand Down Expand Up @@ -59,72 +55,19 @@ async def test_model_route(client):


@pytest.mark.anyio
async def test_embedding(client, model_base):
possible_inputs = [
["This is a test sentence."],
["This is a test sentence.", "This is another test sentence."],
]

for inp in possible_inputs:
response = await client.post(
f"{PREFIX}/embeddings", json=dict(input=inp, model=MODEL)
)
assert response.status_code == 200, f"{response.status_code}, {response.text}"
rdata = response.json()
assert "data" in rdata and isinstance(rdata["data"], list)
assert all("embedding" in d for d in rdata["data"])
assert len(rdata["data"]) == len(inp)

want_embeddings = model_base.encode(inp)

for embedding, st_embedding in zip(rdata["data"], want_embeddings):
np.testing.assert_almost_equal(embedding["embedding"], st_embedding)
async def test_embedding(client, model_base, helpers):
helpers.embedding_verify(client, model_base, prefix=PREFIX, model_name=MODEL)


@pytest.mark.performance
@pytest.mark.anyio
async def test_batch_embedding(client, get_sts_bechmark_dataset, model_base):
sentences = []
for d in get_sts_bechmark_dataset:
for item in d:
sentences.append(item.texts[0])
random.shuffle(sentences)
sentences = sentences[::2] if torch.cuda.is_available() else sentences[::16]
# sentences = sentences[:batch_size*2]
dummy_sentences = ["test" * 512] * batch_size

async def _post_batch(inputs):
return await client.post(
f"{PREFIX}/embeddings", json=dict(input=inputs, model=MODEL)
)

response = await _post_batch(inputs=dummy_sentences)

_request_size = int(batch_size * 1.5)
tasks = [
_post_batch(inputs=sentences[sl : sl + _request_size])
for sl in range(0, len(sentences), _request_size)
]
start = time.perf_counter()
_responses = await asyncio.gather(*tasks)
end = time.perf_counter()
time_api = end - start

responses = []
for response in _responses:
responses.extend(response.json()["data"])
for i in range(len(responses)):
responses[i] = responses[i]["embedding"]

model_base.encode(
dummy_sentences,
async def test_batch_embedding(client, get_sts_bechmark_dataset, model_base, helpers):
helpers.util_batch_embedding(
client=client,
sts_bechmark_dataset=get_sts_bechmark_dataset,
model_base=model_base,
prefix=PREFIX,
model_name=MODEL,
batch_size=batch_size,
normalize_embeddings=True,
convert_to_numpy=True,
downsample=2 if torch.cuda.is_available() else 16,
)
start = time.perf_counter()
encodings = model_base.encode(sentences, batch_size=batch_size).tolist()
end = time.perf_counter()
time_st = end - start
np.testing.assert_almost_equal(np.array(responses), np.array(encodings), decimal=6)
assert time_api / time_st < 2.5
94 changes: 11 additions & 83 deletions libs/infinity_emb/tests/end_to_end/test_fastembed.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
import asyncio
import random
import time

import numpy as np
import pytest
from asgi_lifespan import LifespanManager
from httpx import AsyncClient
Expand All @@ -12,7 +7,7 @@
from infinity_emb.transformer.utils import InferenceEngine

PREFIX = "/v1_fastembed"
MODEL: str = "BAAI/bge-base-en" # pytest.DEFAULT_BERT_MODEL # type: ignore
MODEL: str = "BAAI/bge-small-en-v1.5" # pytest.DEFAULT_BERT_MODEL # type: ignore

batch_size = 8

Expand Down Expand Up @@ -48,86 +43,19 @@ async def test_model_route(client):


@pytest.mark.anyio
async def test_embedding(client, model_base):
possible_inputs = [
["This is a test sentence."],
["This is a test sentence.", "This is another test sentence."],
]

for inp in possible_inputs:
response = await client.post(
f"{PREFIX}/embeddings", json=dict(input=inp, model=MODEL)
)
assert response.status_code == 200, f"{response.status_code}, {response.text}"
rdata = response.json()
assert "data" in rdata and isinstance(rdata["data"], list)
assert all("embedding" in d for d in rdata["data"])
assert len(rdata["data"]) == len(inp)

want_embeddings = model_base.encode(inp)

for embedding, st_embedding in zip(rdata["data"], want_embeddings):
cosine_sim = np.dot(embedding["embedding"], st_embedding) / (
np.linalg.norm(embedding["embedding"]) * np.linalg.norm(st_embedding)
)
# TODO: fastembed is not producing the correct results.
assert cosine_sim > 0.95
np.testing.assert_almost_equal(
embedding["embedding"], st_embedding, decimal=0
)
async def test_embedding(client, model_base, helpers):
helpers.embedding_verify(client, model_base, prefix=PREFIX, model_name=MODEL)


@pytest.mark.performance
@pytest.mark.anyio
async def test_batch_embedding(client, get_sts_bechmark_dataset, model_base):
sentences = []
for d in get_sts_bechmark_dataset:
for item in d:
sentences.append(item.texts[0])
random.shuffle(sentences)
sentences = sentences[::16]
# sentences = sentences[:batch_size*2]
dummy_sentences = ["test" * 512] * batch_size

async def _post_batch(inputs):
return await client.post(
f"{PREFIX}/embeddings", json=dict(input=inputs, model=MODEL)
)

response = await _post_batch(inputs=dummy_sentences)

_request_size = int(batch_size * 1.5)
tasks = [
_post_batch(inputs=sentences[sl : sl + _request_size])
for sl in range(0, len(sentences), _request_size)
]
start = time.perf_counter()
_responses = await asyncio.gather(*tasks)
end = time.perf_counter()
time_api = end - start

responses = []
for response in _responses:
responses.extend(response.json()["data"])
for i in range(len(responses)):
responses[i] = responses[i]["embedding"]

model_base.encode(
dummy_sentences,
async def test_batch_embedding(client, get_sts_bechmark_dataset, model_base, helpers):
helpers.util_batch_embedding(
client=client,
sts_bechmark_dataset=get_sts_bechmark_dataset,
model_base=model_base,
prefix=PREFIX,
model_name=MODEL,
batch_size=batch_size,
normalize_embeddings=True,
convert_to_numpy=True,
downsample=16,
)
start = time.perf_counter()
encodings = model_base.encode(sentences, batch_size=batch_size).tolist()
end = time.perf_counter()
time_st = end - start

responses = np.array(responses)
encodings = np.array(encodings)

for r, e in zip(responses, encodings):
cosine_sim = np.dot(r, e) / (np.linalg.norm(e) * np.linalg.norm(r))
assert cosine_sim > 0.95
np.testing.assert_almost_equal(np.array(responses), np.array(encodings), decimal=0)
assert time_api / time_st < 2.5
Loading

0 comments on commit 4a31667

Please sign in to comment.