adapt warning; update docs

michaelfeil · Apr 8, 2024 · 55b2f56 · 55b2f56
1 parent 0a5f197
commit 55b2f56
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 5 deletions.
diff --git a/docs/docs/deploy.md b/docs/docs/deploy.md
@@ -12,9 +12,11 @@ docker run \
   --model-name-or-path $model --port $port
 ```
 
-### Docker with offline mode
+### Docker with offline mode and models with custom pip packages
 
 If you want to run infinity in a location without internet access, you can pre-download the model into the dockerfile.
+This is also the advised route to go, if you want to use infinity with models that require additional packages such as 
+`nomic-ai/nomic-embed-text-v1.5`.
 
 ```bash
 # clone the repo
@@ -26,7 +28,7 @@ docker buildx build --target=production-with-download \
 --build-arg MODEL_NAME=michaelfeil/bge-small-en-v1.5 --build-arg ENGINE=torch \
 -f Dockerfile -t infinity-model-small .
 ```
-You can also set an argument `EXTRA_PACKAGES` if you require to  `--build-arg EXTRA_PACKAGES="einsum torch_geometric"` 
+You can also set an argument `EXTRA_PACKAGES` if you require to install any extra packages.  `--build-arg EXTRA_PACKAGES="einsum torch_geometric"` 
 
 Rename and push it to your internal docker registry. 
 

diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py
@@ -81,9 +81,9 @@ def __init__(
         self._last_inference = time.perf_counter()
 
         if batch_delay > 0.1:
-            logger.warn(f"high batch delay of {self._batch_delay}")
+            logger.warning(f"high batch delay of {self._batch_delay}")
         if max_batch_size > max_queue_wait * 10:
-            logger.warn(
+            logger.warning(
                 f"queue_size={self.max_queue_wait} to small "
                 f"over batch_size={self.max_batch_size}."
                 " Consider increasing queue size"

diff --git a/libs/infinity_emb/infinity_emb/transformer/quantization/quant.py b/libs/infinity_emb/infinity_emb/transformer/quantization/quant.py
@@ -681,7 +681,7 @@ def quantize(
     device: str = default_device,
 ) -> tuple[QuantHandler, dict]:
     CHECK_TORCH.mark_required()
-    logger.warn(
+    logger.warning(
         f"quantization to {mode} mode currently yields incorrect results. Do not use for production."
     )
     precision = torch.bfloat16