diff --git a/docs/docs/deploy.md b/docs/docs/deploy.md index a1e54014..8f223c20 100644 --- a/docs/docs/deploy.md +++ b/docs/docs/deploy.md @@ -12,6 +12,33 @@ docker run \ --model-name-or-path $model --port $port ``` +### Docker with offline mode + +If you want to run infinity in a location without internet access, you can pre-download the model into the dockerfile. + +```bash +# clone the repo +git clone https://github.com/michaelfeil/infinity +git checkout tags/0.0.32 +cd libs/infinity_emb +# build download stage using docker buildx buildkit. +docker buildx build --target=production-with-download \ +--build-arg MODEL_NAME=michaelfeil/bge-small-en-v1.5 --build-arg ENGINE=torch \ +-f Dockerfile -t infinity-model-small . +``` +You can also set an argument `EXTRA_PACKAGES` if you require to `--build-arg EXTRA_PACKAGES="einsum torch_geometric"` + +Rename and push it to your internal docker registry. + +```bash +docker tag infinity-model-small myregistryhost:5000/myinfinity/infinity:0.0.32-small +docker push myregistryhost:5000/myinfinity/infinity:small-0.0.32 +``` + +Note: You can also save a dockerfile direclty as `.tar`. +This might come in handy if you do not have a shared internal docker registry in your nuclear facility, but still want to leverage the latest semantic search. +https://docs.docker.com/reference/cli/docker/image/save/. + ### Extending the Dockerfile Launching multiple models in one dockerfile diff --git a/libs/infinity_emb/Dockerfile b/libs/infinity_emb/Dockerfile index fc8a0a38..824b8397 100644 --- a/libs/infinity_emb/Dockerfile +++ b/libs/infinity_emb/Dockerfile @@ -102,14 +102,15 @@ ENTRYPOINT ["infinity_emb"] # --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small . FROM tested-builder AS production-with-download # collect model name and engine from build args -ARG MODEL_NAME +ARG MODEL_NAME RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi ARG ENGINE RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi ARG EXTRA_PACKAGES -RUN if [ -z "${EXTRA_PACKAGES}" ]; ; then python -m pip install --no-cache-dir $EXTRA_PACKAGES; fi +RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi -RUN infinity_emb --model-name-or-path $MODEL_NAME --engine $ENGINE --preload_only || true +# will exit with 3 if model is downloaded # TODO: better exit code +RUN infinity_emb --model-name-or-path $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] ENTRYPOINT ["infinity_emb"] # Use a multi-stage build -> production version diff --git a/libs/infinity_emb/tests/unit_test/test_infinity_server.py b/libs/infinity_emb/tests/unit_test/test_infinity_server.py index 2cc0a21d..5034596d 100644 --- a/libs/infinity_emb/tests/unit_test/test_infinity_server.py +++ b/libs/infinity_emb/tests/unit_test/test_infinity_server.py @@ -38,6 +38,12 @@ def test_cli_wrong_batch_size(): assert log.returncode == 2 +@pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") +def test_cli_preload(): + log = subprocess.run(["infinity_emb", "--preload-only"]) + assert log.returncode == 3 + + def test_create_server(): app = create_server(EngineArgs(engine="debugengine")) assert isinstance(app, FastAPI)