Skip to content

Commit

Permalink
Merge pull request #10 from michaelfeil/add-fastembed
Browse files Browse the repository at this point in the history
Refactor model dir
  • Loading branch information
michaelfeil authored Oct 16, 2023
2 parents 6856568 + 06cb67d commit 10cd8e3
Show file tree
Hide file tree
Showing 18 changed files with 344 additions and 200 deletions.
13 changes: 11 additions & 2 deletions libs/infinity_emb/infinity_emb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
__all__ = ["logger", "create_server", "inference", "fastapi_schemas", "__version__"]
__all__ = [
"transformer",
"inference",
"fastapi_schemas",
"logger",
"create_server",
"__version__",
]
import importlib.metadata

from infinity_emb import fastapi_schemas, inference
from infinity_emb import fastapi_schemas, inference, transformer

# reexports
from infinity_emb.infinity_server import create_server
from infinity_emb.log_handler import logger

Expand Down
3 changes: 2 additions & 1 deletion libs/infinity_emb/infinity_emb/inference/batch_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Union

from infinity_emb.inference.models import BaseTransformer, get_lengths_with_tokenize
from infinity_emb.inference.primitives import (
EmbeddingResult,
NpEmbeddingType,
Expand All @@ -16,6 +15,8 @@
)
from infinity_emb.inference.threading_asyncio import EventTS
from infinity_emb.log_handler import logger
from infinity_emb.transformer.abstract import BaseTransformer
from infinity_emb.transformer.utils import get_lengths_with_tokenize


class CustomPrioQueue:
Expand Down
3 changes: 2 additions & 1 deletion libs/infinity_emb/infinity_emb/inference/select_model.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from time import perf_counter

from infinity_emb.inference.models import BaseTransformer, InferenceEngine
from infinity_emb.inference.primitives import EmbeddingResult, NpEmbeddingType
from infinity_emb.log_handler import logger
from infinity_emb.transformer.abstract import BaseTransformer
from infinity_emb.transformer.utils import InferenceEngine


def select_model_to_functional(
Expand Down
9 changes: 5 additions & 4 deletions libs/infinity_emb/infinity_emb/infinity_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,16 @@
OpenAIEmbeddingResult,
OpenAIModelInfo,
)
from infinity_emb.inference import BatchHandler, models, select_model_to_functional
from infinity_emb.inference import BatchHandler, select_model_to_functional
from infinity_emb.log_handler import UVICORN_LOG_LEVELS, logger
from infinity_emb.transformer.utils import InferenceEngine, InferenceEngineTypeHint


def create_server(
model_name_or_path: str = "sentence-transformers/all-MiniLM-L6-v2",
url_prefix: str = "/v1",
batch_size: int = 64,
engine: models.InferenceEngine = models.InferenceEngine.torch,
engine: InferenceEngine = InferenceEngine.torch,
verbose: bool = False,
model_warmup=True,
doc_extra: dict = {},
Expand Down Expand Up @@ -152,7 +153,7 @@ def start_uvicorn(
host: str = "0.0.0.0",
port: int = 8001,
log_level: UVICORN_LOG_LEVELS = UVICORN_LOG_LEVELS.info.name, # type: ignore
engine: models.InferenceEngineTypeHint = models.InferenceEngineTypeHint.torch.name, # type: ignore # noqa
engine: InferenceEngineTypeHint = InferenceEngineTypeHint.torch.name, # type: ignore # noqa
model_warmup: bool = True,
):
"""Infinity Embedding API ♾️ cli to start a uvicorn-server instance;
Expand All @@ -170,7 +171,7 @@ def start_uvicorn(
engine: framework that should perform inference.
model_warmup: perform model warmup before starting the server. Defaults to True.
"""
engine_load: models.InferenceEngine = models.InferenceEngine[engine.name]
engine_load: InferenceEngine = InferenceEngine[engine.name]
logger.setLevel(log_level.to_int())

app = create_server(
Expand Down
25 changes: 25 additions & 0 deletions libs/infinity_emb/infinity_emb/transformer/abstract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from abc import ABC, abstractmethod
from typing import Any, List

from infinity_emb.inference.primitives import NpEmbeddingType

INPUT_FEATURE = Any
OUT_FEATURES = Any


class BaseTransformer(ABC): # Inherit from ABC(Abstract base class)
@abstractmethod # Decorator to define an abstract method
def encode_pre(self, sentences: List[str]) -> INPUT_FEATURE:
pass

@abstractmethod
def encode_core(self, features: INPUT_FEATURE) -> OUT_FEATURES:
pass

@abstractmethod
def encode_post(self, embedding: OUT_FEATURES) -> NpEmbeddingType:
pass

@abstractmethod
def tokenize_lengths(self, sentences: List[str]) -> List[int]:
pass
27 changes: 27 additions & 0 deletions libs/infinity_emb/infinity_emb/transformer/dummytransformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import List

import numpy as np

from infinity_emb.inference.primitives import NpEmbeddingType
from infinity_emb.transformer.abstract import BaseTransformer


class DummyTransformer(BaseTransformer):
"""fix-13 dimension embedding, filled with length of sentence"""

def __init__(self, *args, **kwargs) -> None:
pass

def encode_pre(self, sentences: List[str]) -> np.ndarray:
return np.asarray(sentences)

def encode_core(self, features: np.ndarray) -> NpEmbeddingType:
lengths = np.array([[len(s) for s in features]])
# embedding of size 13
return np.ones([len(features), 13]) * lengths.T

def encode_post(self, embedding: NpEmbeddingType):
return embedding

def tokenize_lengths(self, sentences: List[str]) -> List[int]:
return [len(s) for s in sentences]
52 changes: 52 additions & 0 deletions libs/infinity_emb/infinity_emb/transformer/fastembed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# from typing import List, Dict
# from infinity_emb.inference.primitives import NpEmbeddingType
# from infinity_emb.transformer.abstract import BaseTransformer
# import numpy as np
# import copy

# class FlagEmbeddingFake:
# def __init__(self, *args, **kwargs) -> None:
# pass

# try:
# from fastembed.embedding import FlagEmbedding, normalize
# except:
# FlagEmbedding = FlagEmbeddingFake

# class FastEmbed(FlagEmbedding, BaseTransformer):
# def __init__(self, *args, **kwargs):
# FlagEmbedding.__init__(self)(*args, **kwargs)
# if FlagEmbedding == FlagEmbeddingFake:
# raise ImportError("fastembed is not installed.")
# self._infinity_tokenizer = copy.deepcopy(self.tokenizer)

# def encode_pre(self, sentences: List[str]) -> Dict[str, np.ndarray[int]]:
# encoded = self.tokenizer.encode_batch(sentences)
# input_ids = np.array([e.ids for e in encoded])
# attention_mask = np.array([e.attention_mask for e in encoded])

# onnx_input = {
# "input_ids": np.array(input_ids, dtype=np.int64),
# "attention_mask": np.array(attention_mask, dtype=np.int64),
# }

# if not self.exclude_token_type_ids:
# onnx_input["token_type_ids"] = np.array(
# [np.zeros(len(e), dtype=np.int64) for e in input_ids], dtype=np.int64
# )
# return onnx_input

# def encode_core(self, features: Dict[str, np.ndarray[int]]) -> np.ndarray:
# model_output = self.model.run(None, features)
# last_hidden_state = model_output[0][:, 0]
# return last_hidden_state

# def encode_post(self, embedding: np.ndarray) -> NpEmbeddingType:
# return normalize(embedding).astype(np.float32)

# def tokenize_lengths(self, sentences: List[str]) -> List[int]:
# # tks = self._infinity_tokenizer.encode_batch(
# # sentences,
# # )
# # return [len(t.tokens) for t in tks]
# return [len(s) for s in sentences]
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import copy
import os
from abc import ABC, abstractmethod
from enum import Enum
from typing import Any, Callable, Dict, List, Tuple, Union
from typing import Dict, List, Union

import numpy as np
import torch
Expand All @@ -11,57 +9,13 @@

from infinity_emb.inference.primitives import NpEmbeddingType
from infinity_emb.log_handler import logger
from infinity_emb.transformer.abstract import BaseTransformer

__all__ = [
"InferenceEngine",
"InferenceEngineTypeHint",
"DummyTransformer",
"SentenceTransformerPatched",
"CT2SentenceTransformer",
"BaseTransformer",
]

INPUT_FEATURE = Any
OUT_FEATURES = Any


class BaseTransformer(ABC): # Inherit from ABC(Abstract base class)
@abstractmethod # Decorator to define an abstract method
def encode_pre(self, sentences: List[str]) -> INPUT_FEATURE:
pass

@abstractmethod
def encode_core(self, features: INPUT_FEATURE) -> OUT_FEATURES:
pass

@abstractmethod
def encode_post(self, embedding: OUT_FEATURES) -> NpEmbeddingType:
pass

@abstractmethod
def tokenize_lengths(self, sentences: List[str]) -> List[int]:
pass


class DummyTransformer(BaseTransformer):
"""fix-13 dimension embedding, filled with length of sentence"""

def __init__(self, *args, **kwargs) -> None:
pass

def encode_pre(self, sentences: List[str]) -> np.ndarray:
return np.asarray(sentences)

def encode_core(self, features: np.ndarray) -> NpEmbeddingType:
lengths = np.array([[len(s) for s in features]])
# embedding of size 13
return np.ones([len(features), 13]) * lengths.T

def encode_post(self, embedding: NpEmbeddingType):
return embedding

def tokenize_lengths(self, sentences: List[str]) -> List[int]:
return [len(s) for s in sentences]


class SentenceTransformerPatched(SentenceTransformer, BaseTransformer):
"""SentenceTransformer with .encode_core() and no microbatching"""
Expand Down Expand Up @@ -312,26 +266,3 @@ def forward(self, features):

def tokenize(self, *args, **kwargs):
return self._tokenize(*args, **kwargs)


def length_tokenizer(
_sentences: List[str],
) -> List[int]:
return [len(i) for i in _sentences]


def get_lengths_with_tokenize(
_sentences: List[str], tokenize: Callable = length_tokenizer
) -> Tuple[List[int], int]:
_lengths = tokenize(_sentences)
return _lengths, sum(_lengths)


class InferenceEngine(Enum):
torch = SentenceTransformerPatched
debugengine = DummyTransformer
ctranslate2 = CT2SentenceTransformer


types: Dict[str, str] = {e.name: e.name for e in InferenceEngine}
InferenceEngineTypeHint = Enum("InferenceEngineTypeHint", types) # type: ignore
39 changes: 39 additions & 0 deletions libs/infinity_emb/infinity_emb/transformer/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from enum import Enum
from typing import Callable, Dict, List, Tuple

from infinity_emb.transformer.dummytransformer import DummyTransformer
from infinity_emb.transformer.sentence_transformer import (
CT2SentenceTransformer,
SentenceTransformerPatched,
)

# from infinity_emb.transformer.fastembed import FastEmbed
__all__ = [
"InferenceEngine",
"InferenceEngineTypeHint",
"length_tokenizer",
"get_lengths_with_tokenize",
]


class InferenceEngine(Enum):
torch = SentenceTransformerPatched
ctranslate2 = CT2SentenceTransformer
debugengine = DummyTransformer


types: Dict[str, str] = {e.name: e.name for e in InferenceEngine}
InferenceEngineTypeHint = Enum("InferenceEngineTypeHint", types) # type: ignore


def length_tokenizer(
_sentences: List[str],
) -> List[int]:
return [len(i) for i in _sentences]


def get_lengths_with_tokenize(
_sentences: List[str], tokenize: Callable = length_tokenizer
) -> Tuple[List[int], int]:
_lengths = tokenize(_sentences)
return _lengths, sum(_lengths)
Loading

0 comments on commit 10cd8e3

Please sign in to comment.