|
66 | 66 | "outputs": [],
|
67 | 67 | "source": [
|
68 | 68 | "# Choose a specific version of LMI image directly:\n",
|
69 |
| - "# image_uri = \"763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.29.0-lmi11.0.0-cu124\"\n", |
70 |
| - "# image_uri = image_uris.retrieve(framework=\"djl-lmi\", region=\"us-east-1\", version=\"latest\")\n", |
71 |
| - "image_uri = image_uris.retrieve(framework=\"djl-lmi\", region=\"us-east-1\", version=\"0.29.0\")" |
| 69 | + "# image_uri = \"763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.31.0-lmi13.0.0-cu124\"\n", |
| 70 | + "# image_uri = image_uris.retrieve(framework=\"djl-lmi\", region=\"us-west-2\", version=\"latest\")\n", |
| 71 | + "image_uri = image_uris.retrieve(framework=\"djl-lmi\", region=\"us-west-2\", version=\"0.31.0\")" |
72 | 72 | ]
|
73 | 73 | },
|
74 | 74 | {
|
|
88 | 88 | "metadata": {},
|
89 | 89 | "outputs": [],
|
90 | 90 | "source": [
|
91 |
| - "# model_id = \"djl://ai.djl.huggingface.onnxruntime/BAAI/bge-reranker-v2-m3\" # use DJL model zoo model\n", |
| 91 | + "# model_id = \"djl://ai.djl.huggingface.rust/BAAI/bge-reranker-v2-m3\" # use DJL model zoo model\n", |
92 | 92 | "# model_id = \"s3://YOUR_BUCKET\" # download model from your s3 bucket\n",
|
93 | 93 | "model_id = \"BAAI/bge-reranker-v2-m3\" # model will be download form Huggingface hub\n",
|
94 | 94 | "\n",
|
95 | 95 | "env = {\n",
|
96 |
| - " # \"SERVING_BATCH_SIZE\": \"32\", # enable dynamic batch with max batch size 32\n", |
| 96 | + " \"SERVING_BATCH_SIZE\": \"32\", # enable dynamic batch with max batch size 32\n", |
97 | 97 | " \"SERVING_MIN_WORKERS\": \"1\", # make sure min and max Workers are equals when deploy model on GPU\n",
|
98 | 98 | " \"SERVING_MAX_WORKERS\": \"1\",\n",
|
99 | 99 | " \"ARGS_RERANKING\": \"true\", # Use Reranking\n",
|
100 |
| - " # \"OPTION_OPTIMIZATION=O2\", # use OnnxRuntime O2 optimization\n", |
101 | 100 | "}\n",
|
102 | 101 | "\n",
|
103 | 102 | "model = DJLModel(\n",
|
104 | 103 | " model_id=model_id,\n",
|
105 | 104 | " task=\"text-embedding\",\n",
|
106 |
| - " # engine=\"Rust\", # explicitly choose Rust engine (supported in LMI 0.30.0+\n", |
107 |
| - " image_uri=image_uri, # choose a specific version of LMI DLC image\n", |
| 105 | + " #engine=\"Rust\", # explicitly choose Rust engine\n", |
| 106 | + " #image_uri=image_uri, # choose a specific version of LMI DLC image\n", |
108 | 107 | " env=env,\n",
|
109 | 108 | " role=role)"
|
110 | 109 | ]
|
|
242 | 241 | "session.delete_endpoint_config(endpoint_name)\n",
|
243 | 242 | "model.delete_model()"
|
244 | 243 | ]
|
245 |
| - }, |
246 |
| - { |
247 |
| - "cell_type": "code", |
248 |
| - "execution_count": null, |
249 |
| - "id": "a0657f54-9b89-416d-add8-8cb068f470ca", |
250 |
| - "metadata": {}, |
251 |
| - "outputs": [], |
252 |
| - "source": [] |
253 | 244 | }
|
254 | 245 | ],
|
255 | 246 | "metadata": {
|
256 | 247 | "kernelspec": {
|
257 |
| - "display_name": "Python 3 (ipykernel)", |
| 248 | + "display_name": "conda_python3", |
258 | 249 | "language": "python",
|
259 |
| - "name": "python3" |
| 250 | + "name": "conda_python3" |
260 | 251 | },
|
261 | 252 | "language_info": {
|
262 | 253 | "codemirror_mode": {
|
|
268 | 259 | "name": "python",
|
269 | 260 | "nbconvert_exporter": "python",
|
270 | 261 | "pygments_lexer": "ipython3",
|
271 |
| - "version": "3.11.0" |
| 262 | + "version": "3.10.14" |
272 | 263 | }
|
273 | 264 | },
|
274 | 265 | "nbformat": 4,
|
|
0 commit comments