Skip to content

Commit

Permalink
fix v1 error
Browse files Browse the repository at this point in the history
  • Loading branch information
ShangmingCai committed Feb 11, 2025
1 parent 10c20c2 commit 6cdcc91
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 20 deletions.
33 changes: 23 additions & 10 deletions vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from vllm.sequence import Logprob
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
from vllm.transformers_utils.tokenizers import maybe_serialize_tool_calls
from vllm.v1.engine.async_llm import AsyncLLM

logger = init_logger(__name__)

Expand Down Expand Up @@ -243,16 +244,28 @@ async def create_chat_completion(
params=sampling_params,
)
else:
generator = self.engine_client.generate(
engine_prompt,
sampling_params,
request_id,
lora_request=lora_request,
trace_headers=trace_headers,
prompt_adapter_request=prompt_adapter_request,
kv_transfer_params=kv_transfer_params,
priority=request.priority,
)
# Note(shangming): v1 does not support KV transfer yet.
if isinstance(self.engine_client, AsyncLLM):
generator = self.engine_client.generate(
engine_prompt,
sampling_params,
request_id,
lora_request=lora_request,
trace_headers=trace_headers,
prompt_adapter_request=prompt_adapter_request,
priority=request.priority,
)
else:
generator = self.engine_client.generate(
engine_prompt,
sampling_params,
request_id,
lora_request=lora_request,
trace_headers=trace_headers,
prompt_adapter_request=prompt_adapter_request,
kv_transfer_params=kv_transfer_params,
priority=request.priority,
)

generators.append(generator)
except ValueError as e:
Expand Down
33 changes: 23 additions & 10 deletions vllm/entrypoints/openai/serving_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from vllm.sequence import Logprob
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import merge_async_iterators
from vllm.v1.engine.async_llm import AsyncLLM

logger = init_logger(__name__)

Expand Down Expand Up @@ -157,16 +158,28 @@ async def create_completion(
params=sampling_params,
)
else:
generator = self.engine_client.generate(
engine_prompt,
sampling_params,
request_id_item,
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request,
trace_headers=trace_headers,
kv_transfer_params=kv_transfer_params,
priority=request.priority,
)
# Note(shangming): v1 does not support KV transfer yet.
if isinstance(self.engine_client, AsyncLLM):
generator = self.engine_client.generate(
engine_prompt,
sampling_params,
request_id_item,
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request,
trace_headers=trace_headers,
priority=request.priority,
)
else:
generator = self.engine_client.generate(
engine_prompt,
sampling_params,
request_id_item,
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request,
trace_headers=trace_headers,
kv_transfer_params=kv_transfer_params,
priority=request.priority,
)

generators.append(generator)
except ValueError as e:
Expand Down

0 comments on commit 6cdcc91

Please sign in to comment.