update deployment docs

michaelfeil · Apr 8, 2024 · a600e83 · a600e83
1 parent cdd7a21
commit a600e83
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 3 deletions.
diff --git a/docs/docs/deploy.md b/docs/docs/deploy.md
@@ -12,6 +12,33 @@ docker run \
   --model-name-or-path $model --port $port
 ```
 
+### Docker with offline mode
+
+If you want to run infinity in a location without internet access, you can pre-download the model into the dockerfile.
+
+```bash
+# clone the repo
+git clone https://github.com/michaelfeil/infinity
+git checkout tags/0.0.32
+cd libs/infinity_emb
+# build download stage using docker buildx buildkit.
+docker buildx build --target=production-with-download \
+--build-arg MODEL_NAME=michaelfeil/bge-small-en-v1.5 --build-arg ENGINE=torch \
+-f Dockerfile -t infinity-model-small .
+```
+You can also set an argument `EXTRA_PACKAGES` if you require to  `--build-arg EXTRA_PACKAGES="einsum torch_geometric"` 
+
+Rename and push it to your internal docker registry. 
+
+```bash
+docker tag infinity-model-small  myregistryhost:5000/myinfinity/infinity:0.0.32-small
+docker push myregistryhost:5000/myinfinity/infinity:small-0.0.32
+```
+
+Note: You can also save a dockerfile direclty as `.tar`.
+This might come in handy if you do not have a shared internal docker registry in your nuclear facility, but still want to leverage the latest semantic search.
+https://docs.docker.com/reference/cli/docker/image/save/.
+
 ### Extending the Dockerfile
 
 Launching multiple models in one dockerfile

diff --git a/libs/infinity_emb/Dockerfile b/libs/infinity_emb/Dockerfile
@@ -102,14 +102,15 @@ ENTRYPOINT ["infinity_emb"]
 # --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
 FROM tested-builder AS production-with-download
 # collect model name and engine from build args
-ARG MODEL_NAME 
+ARG MODEL_NAME
 RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
 ARG EXTRA_PACKAGES
-RUN if [ -z "${EXTRA_PACKAGES}" ]; ; then python -m pip install --no-cache-dir $EXTRA_PACKAGES; fi
+RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
 
-RUN infinity_emb --model-name-or-path $MODEL_NAME --engine $ENGINE --preload_only || true
+# will exit with 3 if model is downloaded # TODO: better exit code
+RUN infinity_emb --model-name-or-path $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
 # Use a multi-stage build -> production version

diff --git a/libs/infinity_emb/tests/unit_test/test_infinity_server.py b/libs/infinity_emb/tests/unit_test/test_infinity_server.py
@@ -38,6 +38,12 @@ def test_cli_wrong_batch_size():
     assert log.returncode == 2
 
 
+@pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows")
+def test_cli_preload():
+    log = subprocess.run(["infinity_emb", "--preload-only"])
+    assert log.returncode == 3
+
+
 def test_create_server():
     app = create_server(EngineArgs(engine="debugengine"))
     assert isinstance(app, FastAPI)