|
1 |
| -# /// script |
2 |
| -# requires-python = ">=3.11" |
3 |
| -# dependencies = [ |
4 |
| -# "bentoml", |
5 |
| -# "openai", |
6 |
| -# "vllm>=0.7.0", |
7 |
| -# ] |
8 |
| -# /// |
9 | 1 | from __future__ import annotations
|
10 | 2 | import logging, traceback, asyncio
|
11 | 3 | import bentoml, fastapi, pydantic
|
|
59 | 51 |
|
60 | 52 | Please proceed with your analysis and suggestion for the given essay excerpt."""
|
61 | 53 |
|
| 54 | + |
62 | 55 | class Suggestion(pydantic.BaseModel):
|
63 | 56 | suggestion: str
|
64 | 57 |
|
| 58 | + |
65 | 59 | class ServerArgs(pydantic.BaseModel):
|
66 | 60 | model: str
|
67 | 61 | disable_log_requests: bool = True
|
68 | 62 | disable_log_stats: bool = True
|
69 | 63 | max_log_len: int = 1000
|
70 | 64 | response_role: str = 'assistant'
|
71 |
| - served_model_name: Optional[str] = None |
| 65 | + served_model_name: Optional[List[str]] = None |
72 | 66 | chat_template: Optional[str] = None
|
73 | 67 | chat_template_content_format: Literal['auto'] = 'auto'
|
74 | 68 | lora_modules: Optional[List[str]] = None
|
@@ -101,13 +95,8 @@ class ServerArgs(pydantic.BaseModel):
|
101 | 95 | 'access_control_expose_headers': ['Content-Length'],
|
102 | 96 | }
|
103 | 97 | },
|
104 |
| - envs=[{'name': 'HF_TOKEN'}], |
105 |
| - image=bentoml.images.PythonImage(python_version='3.11') |
106 |
| - .python_packages('bentoml>=1.3.21\n') |
107 |
| - .python_packages('flashinfer-python>=0.2.0.post2\n') |
108 |
| - .python_packages('kantoku>=0.18.1\n') |
109 |
| - .python_packages('openai>=1.61.0\n') |
110 |
| - .python_packages('vllm==0.7.2\n'), |
| 98 | + envs=[{'name': 'HF_TOKEN'}, {'name': 'UV_COMPILE_BYTECODE', 'value': 1}], |
| 99 | + image=bentoml.images.PythonImage(python_version='3.11').requirements_file('requirements.txt'), |
111 | 100 | )
|
112 | 101 | class Engine:
|
113 | 102 | ref = bentoml.models.HuggingFaceModel(MODEL_ID, exclude=['*.pth'])
|
@@ -175,5 +164,3 @@ async def suggests(
|
175 | 164 | yield chunk.choices[0].delta.content or ''
|
176 | 165 | except Exception:
|
177 | 166 | yield traceback.format_exc()
|
178 |
| - |
179 |
| -if __name__ == '__main__': Engine.serve_http(port=3000) |
0 commit comments