swarmauri · cobycloud · Sep 26, 2024 · Sep 26, 2024 · Sep 26, 2024
diff --git a/pkgs/swarmauri/swarmauri/llms/concrete/DeepInfraModel.py b/pkgs/swarmauri/swarmauri/llms/concrete/DeepInfraModel.py
@@ -15,56 +15,57 @@ class DeepInfraModel(LLMBase):
     """
 
     api_key: str
-    allowed_models: List[str] = ['01-ai/Yi-34B-Chat',
-     'Austism/chronos-hermes-13b-v2',
-     'Gryphe/MythoMax-L2-13b',
-     'Gryphe/MythoMax-L2-13b-turbo',
-     'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1',
-     'Phind/Phind-CodeLlama-34B-v2',
-     'Qwen/Qwen2-72B-Instruct',
-     'Qwen/Qwen2-7B-Instruct',
-     'Qwen/Qwen2.5-72B-Instruct',
-     'Sao10K/L3-70B-Euryale-v2.1',
-     'Sao10K/L3.1-70B-Euryale-v2.2',
-     'bigcode/starcoder2-15b',
-     'bigcode/starcoder2-15b-instruct-v0.1',
-     'codellama/CodeLlama-34b-Instruct-hf',
-     'codellama/CodeLlama-70b-Instruct-hf',
-     'cognitivecomputations/dolphin-2.6-mixtral-8x7b',
-     'cognitivecomputations/dolphin-2.9.1-llama-3-70b',
-     'databricks/dbrx-instruct',
-     'deepinfra/airoboros-70b',
-     'google/codegemma-7b-it',
-     'google/gemma-1.1-7b-it',
-     'google/gemma-2-27b-it',
-     'google/gemma-2-9b-it',
-     'lizpreciatior/lzlv_70b_fp16_hf',
-     'mattshumer/Reflection-Llama-3.1-70B',
-     'mattshumer/Reflection-Llama-3.1-70B',
-     'meta-llama/Llama-2-13b-chat-hf',
-     'meta-llama/Llama-2-70b-chat-hf',
-     'meta-llama/Llama-2-7b-chat-hf',
-     'meta-llama/Meta-Llama-3-70B-Instruct',
-     'meta-llama/Meta-Llama-3-8B-Instruct',
-     'meta-llama/Meta-Llama-3.1-405B-Instruct',
-     'meta-llama/Meta-Llama-3.1-70B-Instruct',
-     'meta-llama/Meta-Llama-3.1-8B-Instruct',
-     'microsoft/Phi-3-medium-4k-instruct',
-     'microsoft/WizardLM-2-7B',
-     'microsoft/WizardLM-2-8x22B',
-     'mistralai/Mistral-7B-Instruct-v0.1',
-     'mistralai/Mistral-7B-Instruct-v0.2',
-     'mistralai/Mistral-7B-Instruct-v0.3',
-     'mistralai/Mistral-Nemo-Instruct-2407',
-     'mistralai/Mixtral-8x22B-Instruct-v0.1',
-     'mistralai/Mixtral-8x22B-v0.1',
-     'mistralai/Mixtral-8x22B-v0.1',
-     'mistralai/Mixtral-8x7B-Instruct-v0.1',
-     'nvidia/Nemotron-4-340B-Instruct',
-     'openbmb/MiniCPM-Llama3-V-2_5',
-     'openchat/openchat-3.6-8b',
-     'openchat/openchat_3.5'
-     ]
+    allowed_models: List[str] = [
+        "01-ai/Yi-34B-Chat",
+        "Gryphe/MythoMax-L2-13b",  # not consistent with results
+        "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
+        "Phind/Phind-CodeLlama-34B-v2",
+        "Qwen/Qwen2-72B-Instruct",
+        "Qwen/Qwen2-7B-Instruct",
+        "Qwen/Qwen2.5-72B-Instruct",
+        "Sao10K/L3-70B-Euryale-v2.1",
+        "Sao10K/L3.1-70B-Euryale-v2.2",
+        "bigcode/starcoder2-15b",
+        "bigcode/starcoder2-15b-instruct-v0.1",
+        "codellama/CodeLlama-34b-Instruct-hf",
+        "codellama/CodeLlama-70b-Instruct-hf",
+        "cognitivecomputations/dolphin-2.6-mixtral-8x7b",
+        "cognitivecomputations/dolphin-2.9.1-llama-3-70b",
+        "databricks/dbrx-instruct",
+        "google/codegemma-7b-it",
+        "google/gemma-1.1-7b-it",
+        "google/gemma-2-27b-it",
+        "google/gemma-2-9b-it",
+        "lizpreciatior/lzlv_70b_fp16_hf",  # not consistent with results
+        "mattshumer/Reflection-Llama-3.1-70B",
+        "mattshumer/Reflection-Llama-3.1-70B",
+        "meta-llama/Llama-2-13b-chat-hf",
+        "meta-llama/Llama-2-70b-chat-hf",
+        "meta-llama/Llama-2-7b-chat-hf",
+        "meta-llama/Meta-Llama-3-70B-Instruct",
+        "meta-llama/Meta-Llama-3-8B-Instruct",
+        "meta-llama/Meta-Llama-3.1-405B-Instruct",
+        "meta-llama/Meta-Llama-3.1-70B-Instruct",
+        "meta-llama/Meta-Llama-3.1-8B-Instruct",
+        "microsoft/Phi-3-medium-4k-instruct",
+        "microsoft/WizardLM-2-7B",
+        "microsoft/WizardLM-2-8x22B",
+        "mistralai/Mistral-7B-Instruct-v0.1",
+        "mistralai/Mistral-7B-Instruct-v0.2",
+        "mistralai/Mistral-7B-Instruct-v0.3",
+        "mistralai/Mistral-Nemo-Instruct-2407",
+        "mistralai/Mixtral-8x22B-Instruct-v0.1",
+        "mistralai/Mixtral-8x22B-v0.1",
+        "mistralai/Mixtral-8x22B-v0.1",
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "nvidia/Nemotron-4-340B-Instruct",
+        "openbmb/MiniCPM-Llama3-V-2_5",
+        "openchat/openchat-3.6-8b",
+        "openchat/openchat_3.5",  # not compliant with system context
+        # "deepinfra/airoboros-70b", # deprecated: https://deepinfra.com/deepinfra/airoboros-70b
+        #  'Gryphe/MythoMax-L2-13b-turbo', # deprecated: https://deepinfra.com/Gryphe/MythoMax-L2-13b-turbo/api
+        # "Austism/chronos-hermes-13b-v2",  # deprecating: https://deepinfra.com/Austism/chronos-hermes-13b-v2/api
+    ]
 
     name: str = "Qwen/Qwen2-72B-Instruct"
     type: Literal["DeepInfraModel"] = "DeepInfraModel"

diff --git a/pkgs/swarmauri/swarmauri/llms/concrete/OpenAIModel.py b/pkgs/swarmauri/swarmauri/llms/concrete/OpenAIModel.py
@@ -24,19 +24,19 @@ class OpenAIModel(LLMBase):
         "gpt-3.5-turbo",
         "gpt-4o-mini",
         "chatgpt-4o-latest",
-        "gpt-3.5-turbo-instruct",
-        "o1-preview",
-        "o1-mini",
         "gpt-4o-2024-05-13",
         "gpt-4o-2024-08-06",
         "gpt-4o-mini-2024-07-18",
-        "o1-preview-2024-09-12",
-        "o1-mini-2024-09-12",
         "gpt-4-turbo-2024-04-09",
         "gpt-4-0125-preview",
         "gpt-4-0613",
-        "gpt-4-0314",
         "gpt-3.5-turbo-0125",
+        # "gpt-3.5-turbo-instruct", # gpt-3.5-turbo-instruct does not support v1/chat/completions endpoint. only supports (/v1/completions)
+        # "o1-preview",   # Does not support max_tokens and temperature
+        # "o1-mini",      # Does not support max_tokens and temperature
+        # "o1-preview-2024-09-12", # Does not support max_tokens and temperature
+        # "o1-mini-2024-09-12",   # Does not support max_tokens and temperature
+        # "gpt-4-0314",  #  it's deprecated
     ]
     name: str = "gpt-3.5-turbo"
     type: Literal["OpenAIModel"] = "OpenAIModel"
@@ -57,7 +57,7 @@ def predict(
         temperature=0.7,
         max_tokens=256,
         enable_json=False,
-        stop: List[str] = None,
+        stop: List[str] = ["\n"],
     ):
         """
         Generate predictions using the OpenAI model.
@@ -67,6 +67,9 @@ def predict(
         - temperature (float): Sampling temperature.
         - max_tokens (int): Maximum number of tokens to generate.
         - enable_json (bool): Format response as JSON.
+        - stop (List[str]): List of tokens at which to stop generation.
+                being None causes some models to throw status 400
+                 (*chatgpt-4o-latest*)
 
         Returns:
         - The generated message content.

diff --git a/pkgs/swarmauri/swarmauri/llms/concrete/PerplexityModel.py b/pkgs/swarmauri/swarmauri/llms/concrete/PerplexityModel.py
@@ -16,19 +16,20 @@ class PerplexityModel(LLMBase):
 
     api_key: str
     allowed_models: List[str] = [
-        "llama-3-sonar-small-32k-chat",
-        "llama-3-sonar-small-32k-online",
-        "llama-3-sonar-large-32k-chat",
-        "llama-3-sonar-large-32k-online",
-        "llama-3-8b-instruct",
-        "llama-3-70b-instruct",
-        "llama-3.1-70b-instruct",
         "llama-3.1-sonar-small-128k-online",
         "llama-3.1-sonar-large-128k-online",
         "llama-3.1-sonar-huge-128k-online",
         "llama-3.1-sonar-small-128k-chat",
         "llama-3.1-sonar-large-128k-chat",
         "llama-3.1-8b-instruct",
+        "llama-3.1-70b-instruct",
+        # deprecated: https://docs.perplexity.ai/changelog/changelog#introducing-new-and-improved-sonar-models
+        # "llama-3-sonar-small-32k-chat",
+        # "llama-3-sonar-small-32k-online",
+        # "llama-3-sonar-large-32k-chat",
+        # "llama-3-sonar-large-32k-online",
+        # "llama-3-8b-instruct",
+        # "llama-3-70b-instruct",
     ]
     name: str = "llama-3.1-70b-instruct"
     type: Literal["PerplexityModel"] = "PerplexityModel"