diff --git a/app/core.py b/app/core.py index 44d94be..9f03411 100644 --- a/app/core.py +++ b/app/core.py @@ -20,8 +20,8 @@ def __init__(self): self.llm = None try: self.llm = LLM() - except OpenAIError as _: - self.status_queue.put('Set your OpenAPI API Key in Settings and Restart the App') + except OpenAIError as e: + self.status_queue.put(f'Set your OpenAPI API Key in Settings and Restart the App. Error: {e}') def execute_user_request(self, user_request: str) -> None: self.stop_previous_request() diff --git a/app/llm.py b/app/llm.py index 9c84023..e90bcc5 100644 --- a/app/llm.py +++ b/app/llm.py @@ -1,15 +1,13 @@ -import json -import os from pathlib import Path from typing import Any -from openai import ChatCompletion -from openai import OpenAI - +from models.factory import ModelFactory from utils import local_info from utils.screen import Screen from utils.settings import Settings +DEFAULT_MODEL_NAME = 'gpt-4o' + class LLM: """ @@ -43,106 +41,58 @@ class LLM: "done": ... } - function is the function name to call in the executor. + function is the function name to call in the executer. parameters are the parameters of the above function. human_readable_justification is what we can use to debug in case program fails somewhere or to explain to user why we're doing what we're doing. done is null if user request is not complete, and it's a string when it's complete that either contains the information that the user asked for, or just acknowledges completion of the user requested task. This is going to be communicated to the user if it's present. - - Note: Use code below to check whether gpt4v has assistant support yet. - from openai import OpenAI - client = OpenAI() - assistant = client.beta.assistants.create( - name="bot", - instructions="bot", - model="gpt-4-vision-preview", - tools=[{"type": "code_interpreter"}] - ) """ def __init__(self): - settings_dict: dict[str, str] = Settings().get_dict() + self.settings_dict: dict[str, str] = Settings().get_dict() + model_name, base_url, api_key = self.get_settings_values() + + self.model_name = model_name + context = self.read_context_txt_file() + + self.model = ModelFactory.create_model(self.model_name, base_url, api_key, context) - base_url = settings_dict.get('base_url', 'https://api.openai.com/v1/').rstrip('/') + '/' - api_key = settings_dict.get('api_key') - if api_key: - os.environ["OPENAI_API_KEY"] = api_key + def get_settings_values(self) -> tuple[str, str, str]: + model_name = self.settings_dict.get('model') + if not model_name: + model_name = DEFAULT_MODEL_NAME + base_url = self.settings_dict.get('base_url', '') + if not base_url: + base_url = 'https://api.openai.com/v1/' + base_url = base_url.rstrip('/') + '/' + + api_key = self.settings_dict.get('api_key') + + return model_name, base_url, api_key + + def read_context_txt_file(self) -> str: + # Construct context for the assistant by reading context.txt and adding extra system information + context = '' path_to_context_file = Path(__file__).resolve().parent.joinpath('resources', 'context.txt') with open(path_to_context_file, 'r') as file: - self.context = file.read() + context += file.read() - self.context += f' Locally installed apps are {",".join(local_info.locally_installed_apps)}.' - self.context += f' OS is {local_info.operating_system}.' - self.context += f' Primary screen size is {Screen().get_size()}.\n' + context += f' Locally installed apps are {",".join(local_info.locally_installed_apps)}.' + context += f' OS is {local_info.operating_system}.' + context += f' Primary screen size is {Screen().get_size()}.\n' - if 'default_browser' in settings_dict.keys() and settings_dict['default_browser']: - self.context += f'\nDefault browser is {settings_dict["default_browser"]}.' + if 'default_browser' in self.settings_dict.keys() and self.settings_dict['default_browser']: + context += f'\nDefault browser is {self.settings_dict["default_browser"]}.' - if 'custom_llm_instructions' in settings_dict: - self.context += f'\nCustom user-added info: {settings_dict["custom_llm_instructions"]}.' + if 'custom_llm_instructions' in self.settings_dict: + context += f'\nCustom user-added info: {self.settings_dict["custom_llm_instructions"]}.' - self.client = OpenAI() - - self.model = settings_dict.get('model') - if not self.model: - self.model = 'gpt-4-vision-preview' - self.client = OpenAI(api_key=os.environ["OPENAI_API_KEY"], base_url=base_url) + return context def get_instructions_for_objective(self, original_user_request: str, step_num: int = 0) -> dict[str, Any]: - message: list[dict[str, Any]] = self.create_message_for_llm(original_user_request, step_num) - llm_response = self.send_message_to_llm(message) - json_instructions: dict[str, Any] = self.convert_llm_response_to_json(llm_response) - - return json_instructions - - def create_message_for_llm(self, original_user_request, step_num) -> list[dict[str, Any]]: - base64_img: str = Screen().get_screenshot_in_base64() - - request_data: str = json.dumps({ - 'original_user_request': original_user_request, - 'step_num': step_num - }) - - # We have to add context every request for now which is expensive because our chosen model doesn't have a - # stateful/Assistant mode yet. - message = [ - {'type': 'text', 'text': self.context + request_data}, - {'type': 'image_url', - 'image_url': { - 'url': f'data:image/jpeg;base64,{base64_img}' - } - } - ] - - return message - - def send_message_to_llm(self, message) -> ChatCompletion: - response = self.client.chat.completions.create( - model=self.model, - messages=[ - { - 'role': 'user', - 'content': message, - } - ], - max_tokens=800, - ) - return response - - def convert_llm_response_to_json(self, llm_response: ChatCompletion) -> dict[str, Any]: - llm_response_data: str = llm_response.choices[0].message.content.strip() - - # Our current LLM model does not guarantee a JSON response hence we manually parse the JSON part of the response - # Check for updates here - https://platform.openai.com/docs/guides/text-generation/json-mode - start_index = llm_response_data.find('{') - end_index = llm_response_data.rfind('}') - - try: - json_response = json.loads(llm_response_data[start_index:end_index + 1].strip()) - except Exception as e: - print(f'Error while parsing JSON response - {e}') - json_response = {} - - return json_response + return self.model.get_instructions_for_objective(original_user_request, step_num) + + def cleanup(self): + self.model.cleanup() diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/models/factory.py b/app/models/factory.py new file mode 100644 index 0000000..cee1983 --- /dev/null +++ b/app/models/factory.py @@ -0,0 +1,13 @@ +from models.gpt4o import GPT4o +from models.gpt4v import GPT4v + + +class ModelFactory: + @staticmethod + def create_model(model_name, *args): + if model_name == 'gpt-4o': + return GPT4o(model_name, *args) + elif model_name == 'gpt-4-vision-preview' or model_name == 'gpt-4-turbo': + return GPT4v(model_name, *args) + else: + raise ValueError(f'Unsupported model type {model_name}. Create entry in app/models/') diff --git a/app/models/gpt4o.py b/app/models/gpt4o.py new file mode 100644 index 0000000..1ccc4f8 --- /dev/null +++ b/app/models/gpt4o.py @@ -0,0 +1,132 @@ +import json +import time +from typing import Any + +from models.model import Model +from openai.types.beta.threads.message import Message +from utils.screen import Screen + + +# TODO +# [ ] Function calling with assistants api - https://platform.openai.com/docs/assistants/tools/function-calling/quickstart + +class GPT4o(Model): + def __init__(self, model_name, base_url, api_key, context): + super().__init__(model_name, base_url, api_key, context) + + # GPT4o has Assistant Mode enabled that we can utilize to make Open Interface be more contextually aware + self.assistant = self.client.beta.assistants.create( + name='Open Interface Backend', + instructions=self.context, + # tools=[], + model='gpt-4o', + ) + + self.thread = self.client.beta.threads.create() + + # IDs of images uploaded to OpenAI for use with the assistants API, can be cleaned up once thread is no longer needed + self.list_of_image_ids = [] + + def get_instructions_for_objective(self, original_user_request: str, step_num: int = 0) -> dict[str, Any]: + # Upload screenshot to OpenAI - Note: Don't delete files from openai while the thread is active + openai_screenshot_file_id = self.upload_screenshot_and_get_file_id() + + self.list_of_image_ids.append(openai_screenshot_file_id) + + # Format user request to send to LLM + formatted_user_request = self.format_user_request_for_llm(original_user_request, step_num, + openai_screenshot_file_id) + + # Read response + llm_response = self.send_message_to_llm(formatted_user_request) + json_instructions: dict[str, Any] = self.convert_llm_response_to_json_instructions(llm_response) + + return json_instructions + + def send_message_to_llm(self, formatted_user_request) -> Message: + message = self.client.beta.threads.messages.create( + thread_id=self.thread.id, + role='user', + content=formatted_user_request + ) + + run = self.client.beta.threads.runs.create_and_poll( + thread_id=self.thread.id, + assistant_id=self.assistant.id, + instructions='' + ) + + while run.status != 'completed': + print(f'Waiting for response, sleeping for 1. run.status={run.status}') + time.sleep(1) + + if run.status == 'failed': + print(f'failed run run.required_action:{run.required_action} run.last_error: {run.last_error}\n\n') + return None + + if run.status == 'completed': + # NOTE: Apparently right now the API doesn't have a way to retrieve just the last message??? + # So instead you get all messages and take the latest one + response = self.client.beta.threads.messages.list( + thread_id=self.thread.id + ) + + return response.data[0] + else: + print('Run did not complete successfully.') + return None + + def upload_screenshot_and_get_file_id(self): + # Files are used to upload documents like images that can be used with features like Assistants + # Assistants API cannot take base64 images like chat.completions API + filepath = Screen().get_screenshot_file() + + response = self.client.files.create( + file=open(filepath, 'rb'), + purpose='vision' + ) + return response.id + + def format_user_request_for_llm(self, original_user_request, step_num, openai_screenshot_file_id) -> list[ + dict[str, Any]]: + request_data: str = json.dumps({ + 'original_user_request': original_user_request, + 'step_num': step_num + }) + + content = [ + { + 'type': 'text', + 'text': request_data + }, + { + 'type': 'image_file', + 'image_file': { + 'file_id': openai_screenshot_file_id + } + } + ] + + return content + + def convert_llm_response_to_json_instructions(self, llm_response: Message) -> dict[str, Any]: + llm_response_data: str = llm_response.content[0].text.value.strip() + + # Our current LLM model does not guarantee a JSON response hence we manually parse the JSON part of the response + # Check for updates here - https://platform.openai.com/docs/guides/text-generation/json-mode + start_index = llm_response_data.find('{') + end_index = llm_response_data.rfind('}') + + try: + json_response = json.loads(llm_response_data[start_index:end_index + 1].strip()) + except Exception as e: + print(f'Error while parsing JSON response - {e}') + json_response = {} + + return json_response + + def cleanup(self): + # Note: Cannot delete screenshots while the thread is active. Cleanup during shut down. + for id in self.list_of_image_ids: + self.client.files.delete(id) + self.thread = self.client.beta.threads.create() # Using old thread even by accident would cause Image errors diff --git a/app/models/gpt4v.py b/app/models/gpt4v.py new file mode 100644 index 0000000..14b8673 --- /dev/null +++ b/app/models/gpt4v.py @@ -0,0 +1,64 @@ +import json +from typing import Any + +from models.model import Model +from openai import ChatCompletion +from utils.screen import Screen + + +class GPT4v(Model): + def get_instructions_for_objective(self, original_user_request: str, step_num: int = 0) -> dict[str, Any]: + message: list[dict[str, Any]] = self.format_user_request_for_llm(original_user_request, step_num) + llm_response = self.send_message_to_llm(message) + json_instructions: dict[str, Any] = self.convert_llm_response_to_json_instructions(llm_response) + return json_instructions + + def format_user_request_for_llm(self, original_user_request, step_num) -> list[dict[str, Any]]: + base64_img: str = Screen().get_screenshot_in_base64() + + request_data: str = json.dumps({ + 'original_user_request': original_user_request, + 'step_num': step_num + }) + + # We have to add context every request for now which is expensive because our chosen model doesn't have a + # stateful/Assistant mode yet. + message = [ + {'type': 'text', 'text': self.context + request_data}, + {'type': 'image_url', + 'image_url': { + 'url': f'data:image/jpeg;base64,{base64_img}' + } + } + ] + + return message + + def send_message_to_llm(self, message) -> ChatCompletion: + response = self.client.chat.completions.create( + model=self.model_name, + messages=[ + { + 'role': 'user', + 'content': message, + } + ], + max_tokens=800, + ) + return response + + def convert_llm_response_to_json_instructions(self, llm_response: ChatCompletion) -> dict[str, Any]: + llm_response_data: str = llm_response.choices[0].message.content.strip() + + # Our current LLM model does not guarantee a JSON response hence we manually parse the JSON part of the response + # Check for updates here - https://platform.openai.com/docs/guides/text-generation/json-mode + start_index = llm_response_data.find('{') + end_index = llm_response_data.rfind('}') + + try: + json_response = json.loads(llm_response_data[start_index:end_index + 1].strip()) + except Exception as e: + print(f'Error while parsing JSON response - {e}') + json_response = {} + + return json_response diff --git a/app/models/model.py b/app/models/model.py new file mode 100644 index 0000000..1ba9b91 --- /dev/null +++ b/app/models/model.py @@ -0,0 +1,28 @@ +import os +from typing import Any + +from openai import OpenAI + + +class Model: + def __init__(self, model_name, base_url, api_key, context): + self.model_name = model_name + self.base_url = base_url + self.api_key = api_key + self.context = context + self.client = OpenAI(api_key=api_key, base_url=base_url) + + if api_key: + os.environ['OPENAI_API_KEY'] = api_key + + def get_instructions_for_objective(self, *args) -> dict[str, Any]: + pass + + def format_user_request_for_llm(self, *args): + pass + + def convert_llm_response_to_json_instructions(self, *args) -> dict[str, Any]: + pass + + def cleanup(self, *args): + pass diff --git a/app/resources/context.txt b/app/resources/context.txt index d2a0bc5..f542546 100644 --- a/app/resources/context.txt +++ b/app/resources/context.txt @@ -43,12 +43,12 @@ In addition to pyautogui, you can also call sleep(seconds) to wait for apps, web Here are some directions based on your past behavior to make you better: 1. If you think a task is complete, don't keep enqueuing more steps. Just fill the "done" parameter with value. This is very important. -2. Use pyautogui's keyDown and keyUp functions for more precise control to ensure reliability. +2. Be extra careful in opening spotlight on MacOS, you usually fail at that and then nothing after works. 3. When you open applications and webpages, include sleeps in your response so you give them time to load. 4. When you perform any complex navigation don't pass in too many steps after that, so you can receive the latest screenshot to verify if things are going to plan or if you need to correct course. 5. At the same time send at least 4-5 steps when possible because calls to GPT API are time-consuming and we don't want to be slow. 6. Break down your response into very simple steps. This is very important. -7. Try your best relying more on keyboard commands than mouse, because even being off by a couple pixels with mouse clicks makes you miss the button you are trying to click. +7. Do not use pyautogui's mouse commands. Completely rely on keyboard functions. You do extremely poorly with mouse navigation. 8. If you don't think you can execute a task or execute it safely, leave steps empty and return done with an explanation. 9. Very importantly don't respond in anything but JSON. 10. Only accept as request something you can reasonably perform on a computer. @@ -58,7 +58,8 @@ Here are some directions based on your past behavior to make you better: 14. pyautogui.press("enter") is not the same as pyautogui.write("\n") - please do not interchange them. 15. Try going to links directly instead of searching for them. This is very important. 16. Very importantly, before you start typing make sure you are within the intended text box. Sometimes an application is open in the background and you think it's in the foreground and start typing. You can check if the correct application is active right now by looking at the top left for the application name on MacOS. -17. Try not switching applications with keyboard shortcuts, except always launch applications with spotlight on MacOS. +17. Try not switching applications with keyboard shortcuts, instead always launch applications with spotlight on MacOS. +18. Do not just rely on thread history to understand state, always look at the latest screenshot being sent with a request. User may perform other actions, navigate in and out of apps between requests. ALWAYS look at state of the system with the screenshot provided. Lastly, do not ever, ever do anything to hurt the user or the computer system - do not perform risky deletes, or any other similar actions. @@ -133,50 +134,78 @@ class Interpreter: class LLM: def __init__(self): self.client = OpenAI() - self.model = "gpt-4-vision-preview" + self.model = "gpt-4o" with open('context.txt', 'r') as file: self.context = file.read() self.context += f"\nDefault browser is {local_info.default_browser}." self.context += f" Locally installed apps are {','.join(local_info.locally_installed_apps)}." self.context += f" Primary screen size is {Screen().get_size()}.\n" + self.assistant = self.client.beta.assistants.create( + name="Open Interface Backend", + instructions=self.context, + model="gpt-4o", + ) + self.thread = self.client.beta.threads.create() def get_instructions_for_objective(self, original_user_request, step_num=0): - message = self.create_message_for_llm(original_user_request, step_num) - llm_response = self.send_message_to_llm(message) - json_instructions = self.convert_llm_response_to_json(llm_response) + openai_file_id_for_screenshot, temp_filename = self.upload_screenshot_and_get_file_id() + formatted_user_request = self.format_user_request_for_llm(original_user_request, step_num, + openai_file_id_for_screenshot) + llm_response = self.send_message_to_llm_v2(formatted_user_request) + json_instructions: dict[str, Any] = self.convert_llm_response_to_json_v2(llm_response) return json_instructions - def create_message_for_llm(self, original_user_request, step_num): - base64_img = Screen().get_screenshot_in_base64() - request_data = json.dumps({ - "original_user_request": original_user_request, - "step_num": step_num + def format_user_request_for_llm(self, original_user_request, step_num, openai_file_id_for_screenshot) -> list[ + dict[str, Any]]: + request_data: str = json.dumps({ + 'original_user_request': original_user_request, + 'step_num': step_num }) - message = [ - {"type": "text", "text": self.context + request_data}, - {"type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{base64_img}" - } - } - ] - return message - def send_message_to_llm(self, message): - response = self.client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "user", - "content": message, + content = [ + { + 'type': 'text', + 'text': request_data + }, + { + 'type': 'image_file', + 'image_file': { + 'file_id': openai_file_id_for_screenshot } - ], - max_tokens=800, + } + ] + return content + def send_message_to_llm_v2(self, formatted_user_request) -> Message: + message = self.client.beta.threads.messages.create( + thread_id=self.thread.id, + role="user", + content=formatted_user_request ) - return response - def convert_llm_response_to_json(self, llm_response): - llm_response_data = llm_response.choices[0].message.content.strip() - # Our current LLM model does not guarantee a JSON response, hence we manually parse the JSON part of the response - start_index = llm_response_data.find("{") - end_index = llm_response_data.rfind("}") - json_response = eval(llm_response_data[start_index:end_index + 1]) + run = self.client.beta.threads.runs.create_and_poll( + thread_id=self.thread.id, + assistant_id=self.assistant.id, + instructions='' + ) + while run.status != 'completed': + print(f'Waiting for response, sleeping for 1. run.status={run.status}') + time.sleep(1) + if run.status == 'failed': + print(f'failed run run.required_action:{run.required_action} run.last_error: {run.last_error}\n\n') + return None + if run.status == 'completed': + # NOTE: Apparently right now the API doesn't have a way to retrieve just the last message??? + # So instead you get all messages and take the latest one + response = self.client.beta.threads.messages.list( + thread_id=self.thread.id) + return response.data[0] + else: + print("Run did not complete successfully.") + return None + def convert_llm_response_to_json_v2(self, llm_response: ChatCompletion) -> dict[str, Any]: + llm_response_data: str = llm_response.content[0].text.value.strip() + start_index = llm_response_data.find('{') + end_index = llm_response_data.rfind('}') + try: + json_response = json.loads(llm_response_data[start_index:end_index + 1].strip()) + except Exception as e: + print(f'Error while parsing JSON response - {e}') + json_response = {} return json_response - End of code diff --git a/app/ui.py b/app/ui.py index cac7658..c10ea3b 100644 --- a/app/ui.py +++ b/app/ui.py @@ -8,6 +8,7 @@ import speech_recognition as sr from PIL import Image, ImageTk +from llm import DEFAULT_MODEL_NAME from utils.settings import Settings from version import version @@ -45,8 +46,29 @@ def __init__(self, parent): self.base_url_entry.insert(0, settings_dict['base_url']) if 'model' in settings_dict: self.model_entry.insert(0, settings_dict['model']) + self.model_var.set(settings_dict.get('model', 'custom')) + else: + self.model_entry.insert(0, DEFAULT_MODEL_NAME) + self.model_var.set(DEFAULT_MODEL_NAME) def create_widgets(self) -> None: + # Radio buttons for model selection + tk.Label(self, text='Select Model:').pack(pady=10, padx=10) + self.model_var = tk.StringVar(value='custom') # default selection + + # Create a frame to hold the radio buttons + radio_frame = ttk.Frame(self) + radio_frame.pack(padx=20, pady=10) # Add padding around the frame + + models = [ + ('GPT-4v (Most Accurate, Slowest)', 'gpt-4-vision-preview'), + ('GPT-4o (Medium Accurate, Medium Fast)', 'gpt-4o'), + ('GPT-4-Turbo (Least Accurate, Fastest)', 'gpt-4-turbo'), + ('Custom (Specify Settings Below)', 'custom') + ] + for text, value in models: + ttk.Radiobutton(radio_frame, text=text, value=value, variable=self.model_var).pack(anchor=tk.W) + label_base_url = tk.Label(self, text='Custom OpenAI-Like API Model Base URL') label_base_url.pack(pady=10) @@ -68,10 +90,10 @@ def create_widgets(self) -> None: def save_button(self) -> None: base_url = self.base_url_entry.get().strip() - model = self.model_entry.get().strip() + model = self.model_var.get() if self.model_var.get() != 'custom' else self.model_entry.get().strip() settings_dict = { - "base_url": base_url, - "model": model, + 'base_url': base_url, + 'model': model, } self.settings.save_settings_to_file(settings_dict) @@ -148,7 +170,7 @@ def create_widgets(self) -> None: link_label.pack() link_label.bind('', lambda e: open_link( 'https://github.com/AmberSahdev/Open-Interface?tab=readme-ov-file#setup-%EF%B8%8F')) - + # Check for updates Label update_label = tk.Label(self, text='Check for Updates', fg='#499CE4', font=('Helvetica', 10)) update_label.pack() @@ -279,7 +301,8 @@ def voice_input(self) -> None: recognizer = sr.Recognizer() with sr.Microphone() as source: self.update_message('Listening...') - recognizer.adjust_for_ambient_noise(source) # This might also help with asking for mic permissions on Macs + # This might also help with asking for mic permissions on Macs + recognizer.adjust_for_ambient_noise(source) try: audio = recognizer.listen(source, timeout=4) try: diff --git a/app/utils/screen.py b/app/utils/screen.py index 305e212..338251d 100644 --- a/app/utils/screen.py +++ b/app/utils/screen.py @@ -1,8 +1,11 @@ import base64 import io +import os +import tempfile import pyautogui from PIL import Image +from utils.settings import Settings class Screen: @@ -16,12 +19,29 @@ def get_screenshot(self) -> Image.Image: return img def get_screenshot_in_base64(self) -> str: + # Base64 images work with ChatCompletions API but not Assistants API + img_bytes = self.get_screenshot_as_file_object() + encoded_image = base64.b64encode(img_bytes.read()).decode('utf-8') + return encoded_image + + def get_screenshot_as_file_object(self): + # In memory files don't work with OpenAI Assistants API because of missing filename attribute img_bytes = io.BytesIO() img = self.get_screenshot() - img.save(img_bytes, format='PNG') # Save the screenshot to an in-memory file + img.save(img_bytes, format='PNG') # Save the screenshot to an in-memory file. img_bytes.seek(0) + return img_bytes - # Encode this image file in base64 - encoded_image = base64.b64encode(img_bytes.read()).decode('utf-8') + def get_temp_filename_for_current_screenshot(self): + with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile: + screenshot = self.get_screenshot() + screenshot.save(tmpfile.name) + return tmpfile.name - return encoded_image + def get_screenshot_file(self): + # Gonna always keep a screenshot.png in ~/.open-interface/ because file objects, temp files, every other way has an error + filename = 'screenshot.png' + filepath = os.path.join(Settings().get_settings_directory_path(), filename) + img = self.get_screenshot() + img.save(filepath) + return filepath diff --git a/app/utils/settings.py b/app/utils/settings.py index f3f2dd7..99b703b 100644 --- a/app/utils/settings.py +++ b/app/utils/settings.py @@ -6,10 +6,13 @@ class Settings: def __init__(self): - self.settings_file_path = str(Path.home()) + "/.open-interface/settings.json" + self.settings_file_path = self.get_settings_directory_path() + 'settings.json' os.makedirs(os.path.dirname(self.settings_file_path), exist_ok=True) self.settings = self.load_settings_from_file() + def get_settings_directory_path(self): + return str(Path.home()) + '/.open-interface/' + def get_dict(self) -> dict[str, str]: return self.settings diff --git a/build.py b/build.py index 6453e6b..3da23e6 100644 --- a/build.py +++ b/build.py @@ -45,8 +45,8 @@ def build(signing_key=None): - input("Did you remember to increment version.py? " + str(version)) - app_name = "Open\\ Interface" + input('Did you remember to increment version.py? ' + str(version)) + app_name = 'Open\\ Interface' compile(signing_key) @@ -166,7 +166,7 @@ def zip(): return zip_name -if __name__ == "__main__": +if __name__ == '__main__': apple_code_signing_key = None if len(sys.argv) > 1: apple_code_signing_key = sys.argv[1] # Developer ID Application: ... (...) diff --git a/requirements.txt b/requirements.txt index 8a2887e..14c823a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ httpcore==1.0.2 httpx==0.26.0 idna==3.7 MouseInfo==0.1.3 -openai==1.9.0 +openai==1.34.0 pillow==10.3.0 PyAudio==0.2.14 PyAutoGUI==0.9.54 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/simple_test.py b/tests/simple_test.py new file mode 100644 index 0000000..f32c616 --- /dev/null +++ b/tests/simple_test.py @@ -0,0 +1,31 @@ +import os +import sys +import threading +import time + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../app'))) + +from app import App + +from multiprocessing import freeze_support + + +def main(): + # Says hi, waits 12 seconds, requests to open chrome + app = App() + threading.Thread(target=put_requests_in_app, args=(app,), daemon=True).start() + app.run() + return + + +def put_requests_in_app(app): + app.ui.main_window.user_request_queue.put('hi there') + time.sleep(12) + app.ui.main_window.user_request_queue.put('open chrome') + + +if __name__ == '__main__': + freeze_support() # As required by pyinstaller https://www.pyinstaller.org/en/stable/common-issues-and-pitfalls.html#multi-processing + main() + sys.exit(0)