forked from letta-ai/letta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchat_completion_proxy.py
88 lines (76 loc) · 3.25 KB
/
chat_completion_proxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""Key idea: create drop-in replacement for agent's ChatCompletion call that runs on an OpenLLM backend"""
import os
import requests
import json
from .webui.api import get_webui_completion
from .lmstudio.api import get_lmstudio_completion
from .llm_chat_completion_wrappers import airoboros, dolphin, zephyr
from .utils import DotDict
HOST = os.getenv("OPENAI_API_BASE")
HOST_TYPE = os.getenv("BACKEND_TYPE") # default None == ChatCompletion
DEBUG = False
DEFAULT_WRAPPER = airoboros.Airoboros21InnerMonologueWrapper()
def get_chat_completion(
model, # no model, since the model is fixed to whatever you set in your own backend
messages,
functions,
function_call="auto",
):
if HOST is None:
raise ValueError(f"The OPENAI_API_BASE environment variable is not defined. Please set it in your environment.")
if HOST_TYPE is None:
raise ValueError(f"The BACKEND_TYPE environment variable is not defined. Please set it in your environment.")
if function_call != "auto":
raise ValueError(f"function_call == {function_call} not supported (auto only)")
if model == "airoboros-l2-70b-2.1":
llm_wrapper = airoboros.Airoboros21InnerMonologueWrapper()
elif model == "dolphin-2.1-mistral-7b":
llm_wrapper = dolphin.Dolphin21MistralWrapper()
elif model == "zephyr-7B-alpha" or model == "zephyr-7B-beta":
llm_wrapper = zephyr.ZephyrMistralInnerMonologueWrapper()
else:
# Warn the user that we're using the fallback
print(f"Warning: no wrapper specified for local LLM, using the default wrapper")
llm_wrapper = DEFAULT_WRAPPER
# First step: turn the message sequence into a prompt that the model expects
prompt = llm_wrapper.chat_completion_to_prompt(messages, functions)
if DEBUG:
print(prompt)
try:
if HOST_TYPE == "webui":
result = get_webui_completion(prompt)
elif HOST_TYPE == "lmstudio":
result = get_lmstudio_completion(prompt)
else:
print(f"Warning: BACKEND_TYPE was not set, defaulting to webui")
result = get_webui_completion(prompt)
except requests.exceptions.ConnectionError as e:
raise ValueError(f"Was unable to connect to host {HOST}")
if result is None or result == "":
raise Exception(f"Got back an empty response string from {HOST}")
chat_completion_result = llm_wrapper.output_to_chat_completion_response(result)
if DEBUG:
print(json.dumps(chat_completion_result, indent=2))
# unpack with response.choices[0].message.content
response = DotDict(
{
"model": None,
"choices": [
DotDict(
{
"message": DotDict(chat_completion_result),
"finish_reason": "stop", # TODO vary based on backend response
}
)
],
"usage": DotDict(
{
# TODO fix, actually use real info
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
}
),
}
)
return response