@@ -607,65 +607,6 @@ async def chat_completion_stream_generator(
607
607
yield f"data: { json .dumps (chunk .model_dump (exclude_unset = True ), ensure_ascii = False )} \n \n "
608
608
yield "data: [DONE]\n \n "
609
609
610
- async def generate_completion_stream_generator (
611
- request : CompletionRequest , n : int , worker_addr : str
612
- ):
613
- model_name = request .model
614
- id = f"cmpl-{ shortuuid .random ()} "
615
- finish_stream_events = []
616
- for text in request .prompt :
617
- for i in range (n ):
618
- previous_text = ""
619
- gen_params = await get_gen_params (
620
- request .model ,
621
- worker_addr ,
622
- text ,
623
- temperature = request .temperature ,
624
- top_p = request .top_p ,
625
- top_k = request .top_k ,
626
- presence_penalty = request .presence_penalty ,
627
- frequency_penalty = request .frequency_penalty ,
628
- max_tokens = request .max_tokens ,
629
- logprobs = request .logprobs ,
630
- echo = request .echo ,
631
- stop = request .stop ,
632
- )
633
- async for content in generate_completion_stream (gen_params , worker_addr ):
634
- if content ["error_code" ] != 0 :
635
- yield f"data: { json .dumps (chunk .model_dump (exclude_unset = True ), ensure_ascii = False )} \n \n "
636
- yield "data: [DONE]\n \n "
637
- return
638
- decoded_unicode = content ["text" ].replace ("\ufffd " , "" )
639
- delta_text = decoded_unicode [len (previous_text ) :]
640
- previous_text = (
641
- decoded_unicode
642
- if len (decoded_unicode ) > len (previous_text )
643
- else previous_text
644
- )
645
- # todo: index is not apparent
646
- choice_data = CompletionResponseStreamChoice (
647
- index = i ,
648
- text = delta_text ,
649
- logprobs = create_openai_logprobs (content .get ("logprobs" , None )),
650
- finish_reason = content .get ("finish_reason" , None ),
651
- )
652
- chunk = CompletionStreamResponse (
653
- id = id ,
654
- object = "text_completion" ,
655
- choices = [choice_data ],
656
- model = model_name ,
657
- )
658
- if len (delta_text ) == 0 :
659
- if content .get ("finish_reason" , None ) is not None :
660
- finish_stream_events .append (chunk )
661
- continue
662
- yield f"data: { json .dumps (chunk .model_dump (exclude_unset = True ), ensure_ascii = False )} \n \n "
663
- # There is not "content" field in the last delta message, so exclude_none to exclude field "content".
664
- for finish_chunk in finish_stream_events :
665
- yield f"data: { json .dumps (chunk .model_dump (exclude_unset = True ), ensure_ascii = False )} \n \n "
666
- yield "data: [DONE]\n \n "
667
-
668
-
669
610
async def generate_completion_stream (payload : Dict [str , Any ], worker_addr : str ):
670
611
controller_address = app_settings .controller_address
671
612
async with httpx .AsyncClient () as client :
0 commit comments