Skip to content

Commit 44db1e3

Browse files
committed
chore(service): use new Python image SDK
Signed-off-by: Aaron Pham <[email protected]>
1 parent 4749167 commit 44db1e3

File tree

5 files changed

+12
-471
lines changed

5 files changed

+12
-471
lines changed

package.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
"scripts": {
66
"morph:docs": "pnpm --filter ./docs run quartz build --serve -v",
77
"morph:docs:build": "pnpm --filter ./docs run quartz build -v",
8-
"morph:inference": "cd ./python/asteraceae/ && uv run bentoml serve --debug",
8+
"morph:inference": "cd ./python/asteraceae/ && uvx --with-requirements requirements.txt bentoml serve service.py:Engine --debug",
9+
"morph:inference:build": "cd ./python/asteraceae/ && uvx --with-requirements requirements.txt bentoml build service.py:Engine --debug",
910
"morph:dev": "pnpm --filter morph run dev",
1011
"morph:build": "pnpm --filter morph run build"
1112
},

python/asteraceae/pyproject.toml

-29
This file was deleted.

python/asteraceae/requirements.txt

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
bentoml==1.3.20
2+
flashinfer-python>=0.2.0.post2
3+
kantoku>=0.18.1
4+
openai>=1.61.0
5+
vllm==0.7.2

python/asteraceae/service.py

+5-18
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,3 @@
1-
# /// script
2-
# requires-python = ">=3.11"
3-
# dependencies = [
4-
# "bentoml",
5-
# "openai",
6-
# "vllm>=0.7.0",
7-
# ]
8-
# ///
91
from __future__ import annotations
102
import logging, traceback, asyncio
113
import bentoml, fastapi, pydantic
@@ -59,16 +51,18 @@
5951
6052
Please proceed with your analysis and suggestion for the given essay excerpt."""
6153

54+
6255
class Suggestion(pydantic.BaseModel):
6356
suggestion: str
6457

58+
6559
class ServerArgs(pydantic.BaseModel):
6660
model: str
6761
disable_log_requests: bool = True
6862
disable_log_stats: bool = True
6963
max_log_len: int = 1000
7064
response_role: str = 'assistant'
71-
served_model_name: Optional[str] = None
65+
served_model_name: Optional[List[str]] = None
7266
chat_template: Optional[str] = None
7367
chat_template_content_format: Literal['auto'] = 'auto'
7468
lora_modules: Optional[List[str]] = None
@@ -101,13 +95,8 @@ class ServerArgs(pydantic.BaseModel):
10195
'access_control_expose_headers': ['Content-Length'],
10296
}
10397
},
104-
envs=[{'name': 'HF_TOKEN'}],
105-
image=bentoml.images.PythonImage(python_version='3.11')
106-
.python_packages('bentoml>=1.3.21\n')
107-
.python_packages('flashinfer-python>=0.2.0.post2\n')
108-
.python_packages('kantoku>=0.18.1\n')
109-
.python_packages('openai>=1.61.0\n')
110-
.python_packages('vllm==0.7.2\n'),
98+
envs=[{'name': 'HF_TOKEN'}, {'name': 'UV_COMPILE_BYTECODE', 'value': 1}],
99+
image=bentoml.images.PythonImage(python_version='3.11').requirements_file('requirements.txt'),
111100
)
112101
class Engine:
113102
ref = bentoml.models.HuggingFaceModel(MODEL_ID, exclude=['*.pth'])
@@ -175,5 +164,3 @@ async def suggests(
175164
yield chunk.choices[0].delta.content or ''
176165
except Exception:
177166
yield traceback.format_exc()
178-
179-
if __name__ == '__main__': Engine.serve_http(port=3000)

0 commit comments

Comments
 (0)