Skip to content

Commit 5c66601

Browse files
authored
Update text embedding notebooks (#528)
1 parent 5ac3052 commit 5c66601

File tree

2 files changed

+19
-37
lines changed

2 files changed

+19
-37
lines changed

aws/sagemaker/large-model-inference/sample-llm/reranking_bge_reranker_v2_m3.ipynb

+10-19
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@
6666
"outputs": [],
6767
"source": [
6868
"# Choose a specific version of LMI image directly:\n",
69-
"# image_uri = \"763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.29.0-lmi11.0.0-cu124\"\n",
70-
"# image_uri = image_uris.retrieve(framework=\"djl-lmi\", region=\"us-east-1\", version=\"latest\")\n",
71-
"image_uri = image_uris.retrieve(framework=\"djl-lmi\", region=\"us-east-1\", version=\"0.29.0\")"
69+
"# image_uri = \"763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.31.0-lmi13.0.0-cu124\"\n",
70+
"# image_uri = image_uris.retrieve(framework=\"djl-lmi\", region=\"us-west-2\", version=\"latest\")\n",
71+
"image_uri = image_uris.retrieve(framework=\"djl-lmi\", region=\"us-west-2\", version=\"0.31.0\")"
7272
]
7373
},
7474
{
@@ -88,23 +88,22 @@
8888
"metadata": {},
8989
"outputs": [],
9090
"source": [
91-
"# model_id = \"djl://ai.djl.huggingface.onnxruntime/BAAI/bge-reranker-v2-m3\" # use DJL model zoo model\n",
91+
"# model_id = \"djl://ai.djl.huggingface.rust/BAAI/bge-reranker-v2-m3\" # use DJL model zoo model\n",
9292
"# model_id = \"s3://YOUR_BUCKET\" # download model from your s3 bucket\n",
9393
"model_id = \"BAAI/bge-reranker-v2-m3\" # model will be download form Huggingface hub\n",
9494
"\n",
9595
"env = {\n",
96-
" # \"SERVING_BATCH_SIZE\": \"32\", # enable dynamic batch with max batch size 32\n",
96+
" \"SERVING_BATCH_SIZE\": \"32\", # enable dynamic batch with max batch size 32\n",
9797
" \"SERVING_MIN_WORKERS\": \"1\", # make sure min and max Workers are equals when deploy model on GPU\n",
9898
" \"SERVING_MAX_WORKERS\": \"1\",\n",
9999
" \"ARGS_RERANKING\": \"true\", # Use Reranking\n",
100-
" # \"OPTION_OPTIMIZATION=O2\", # use OnnxRuntime O2 optimization\n",
101100
"}\n",
102101
"\n",
103102
"model = DJLModel(\n",
104103
" model_id=model_id,\n",
105104
" task=\"text-embedding\",\n",
106-
" # engine=\"Rust\", # explicitly choose Rust engine (supported in LMI 0.30.0+\n",
107-
" image_uri=image_uri, # choose a specific version of LMI DLC image\n",
105+
" #engine=\"Rust\", # explicitly choose Rust engine\n",
106+
" #image_uri=image_uri, # choose a specific version of LMI DLC image\n",
108107
" env=env,\n",
109108
" role=role)"
110109
]
@@ -242,21 +241,13 @@
242241
"session.delete_endpoint_config(endpoint_name)\n",
243242
"model.delete_model()"
244243
]
245-
},
246-
{
247-
"cell_type": "code",
248-
"execution_count": null,
249-
"id": "a0657f54-9b89-416d-add8-8cb068f470ca",
250-
"metadata": {},
251-
"outputs": [],
252-
"source": []
253244
}
254245
],
255246
"metadata": {
256247
"kernelspec": {
257-
"display_name": "Python 3 (ipykernel)",
248+
"display_name": "conda_python3",
258249
"language": "python",
259-
"name": "python3"
250+
"name": "conda_python3"
260251
},
261252
"language_info": {
262253
"codemirror_mode": {
@@ -268,7 +259,7 @@
268259
"name": "python",
269260
"nbconvert_exporter": "python",
270261
"pygments_lexer": "ipython3",
271-
"version": "3.11.0"
262+
"version": "3.10.14"
272263
}
273264
},
274265
"nbformat": 4,

aws/sagemaker/large-model-inference/sample-llm/text_embedding_deploy_bert.ipynb

+9-18
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
"outputs": [],
6767
"source": [
6868
"# Choose a specific version of LMI image directly:\n",
69-
"# image_uri = \"763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124\""
69+
"# image_uri = \"763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.31.0-lmi13.0.0-cu124\""
7070
]
7171
},
7272
{
@@ -86,21 +86,20 @@
8686
"metadata": {},
8787
"outputs": [],
8888
"source": [
89-
"# model_id = \"djl://ai.djl.huggingface.onnxruntime/BAAI/bge-base-en-v1.5\" # use DJL model zoo model\n",
89+
"# model_id = \"djl://ai.djl.huggingface.rust/BAAI/bge-base-en-v1.5\" # use DJL model zoo model\n",
9090
"# model_id = \"s3://YOUR_BUCKET\" # download model from your s3 bucket\n",
9191
"model_id = \"BAAI/bge-base-en-v1.5\" # model will be download form Huggingface hub\n",
9292
"\n",
9393
"env = {\n",
94-
" # \"SERVING_BATCH_SIZE\": \"32\", # enable dynamic batch with max batch size 32\n",
95-
" \"SERVING_MIN_WORKERS\": \"1\", # make sure min and max Workers are equals when deploy model on GPU\n",
94+
" \"SERVING_BATCH_SIZE\": \"32\", # enable dynamic batch with max batch size 32\n",
95+
" \"SERVING_MIN_WORKERS\": \"1\", # make sure min and max Workers are equals when deploy model on GPU\n",
9696
" \"SERVING_MAX_WORKERS\": \"1\",\n",
97-
" # \"OPTION_OPTIMIZATION=O2\", # use OnnxRuntime O2 optimization\n",
9897
"}\n",
9998
"\n",
10099
"model = DJLModel(\n",
101100
" model_id=model_id,\n",
102101
" task=\"text-embedding\",\n",
103-
" # engine=\"OnnxRuntime\", # explicitly choose OnnxRuntime engine if needed\n",
102+
" #engine=\"Rust\", # explicitly choose Rust engine if needed\n",
104103
" #image_uri=image_uri, # choose a specific version of LMI DLC image\n",
105104
" env=env,\n",
106105
" role=role)"
@@ -129,7 +128,7 @@
129128
"predictor = model.deploy(initial_instance_count=1,\n",
130129
" instance_type=instance_type,\n",
131130
" endpoint_name=endpoint_name,\n",
132-
")\n"
131+
")"
133132
]
134133
},
135134
{
@@ -199,21 +198,13 @@
199198
"session.delete_endpoint_config(endpoint_name)\n",
200199
"model.delete_model()"
201200
]
202-
},
203-
{
204-
"cell_type": "code",
205-
"execution_count": null,
206-
"id": "a0657f54-9b89-416d-add8-8cb068f470ca",
207-
"metadata": {},
208-
"outputs": [],
209-
"source": []
210201
}
211202
],
212203
"metadata": {
213204
"kernelspec": {
214-
"display_name": "Python 3 (ipykernel)",
205+
"display_name": "conda_python3",
215206
"language": "python",
216-
"name": "python3"
207+
"name": "conda_python3"
217208
},
218209
"language_info": {
219210
"codemirror_mode": {
@@ -225,7 +216,7 @@
225216
"name": "python",
226217
"nbconvert_exporter": "python",
227218
"pygments_lexer": "ipython3",
228-
"version": "3.12.3"
219+
"version": "3.10.14"
229220
}
230221
},
231222
"nbformat": 4,

0 commit comments

Comments
 (0)