diff --git a/.env.template b/.env.template index 4694aca08506..479730e84a96 100644 --- a/.env.template +++ b/.env.template @@ -1,6 +1,7 @@ PINECONE_API_KEY=your-pinecone-api-key PINECONE_ENV=your-pinecone-region OPENAI_API_KEY=your-openai-api-key +TEMPERATURE=1 ELEVENLABS_API_KEY=your-elevenlabs-api-key ELEVENLABS_VOICE_1_ID=your-voice-id ELEVENLABS_VOICE_2_ID=your-voice-id @@ -9,11 +10,7 @@ FAST_LLM_MODEL=gpt-3.5-turbo GOOGLE_API_KEY= CUSTOM_SEARCH_ENGINE_ID= USE_AZURE=False -OPENAI_AZURE_API_BASE=your-base-url-for-azure -OPENAI_AZURE_API_VERSION=api-version-for-azure -OPENAI_AZURE_DEPLOYMENT_ID=deployment-id-for-azure -OPENAI_AZURE_CHAT_DEPLOYMENT_ID=deployment-id-for-azure-chat -OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID=deployment-id-for-azure-embeddigs +EXECUTE_LOCAL_COMMANDS=False IMAGE_PROVIDER=dalle HUGGINGFACE_API_TOKEN= USE_MAC_OS_TTS=False diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9fa565936058..c355965ab4d5 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -26,7 +26,7 @@ By following these guidelines, your PRs are more likely to be merged quickly aft - [ ] I have thoroughly tested my changes with multiple different prompts. - [ ] I have considered potential risks and mitigations for my changes. - [ ] I have documented my changes clearly and comprehensively. -- [ ] I have not snuck in any "extra" small tweaks changes +- [ ] I have not snuck in any "extra" small tweaks changes diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 070df794b94a..0b90b55d34fe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: - name: Lint with flake8 continue-on-error: false - run: flake8 scripts/ tests/ --select E303,W293,W291,W292,E305 + run: flake8 scripts/ tests/ --select E303,W293,W291,W292,E305,E231,E302 - name: Run unittest tests with coverage run: | diff --git a/.gitignore b/.gitignore index cfa3b08b5d98..cf6e75df15b6 100644 --- a/.gitignore +++ b/.gitignore @@ -7,9 +7,11 @@ package-lock.json auto_gpt_workspace/* *.mpeg .env +azure.yaml *venv/* outputs/* ai_settings.yaml +last_run_ai_settings.yaml .vscode .idea/* auto-gpt.json @@ -19,3 +21,6 @@ log.txt .coverage coverage.xml htmlcov/ + +# For Macs Dev Environs: ignoring .Desktop Services_Store +.DS_Store diff --git a/README.md b/README.md index c9ef9d5c5955..20aaea8b64ea 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ ![GitHub Repo stars](https://img.shields.io/github/stars/Torantulino/auto-gpt?style=social) ![Twitter Follow](https://img.shields.io/twitter/follow/siggravitas?style=social) -[![](https://dcbadge.vercel.app/api/server/PQ7VX6TY4t?style=flat)](https://discord.gg/PQ7VX6TY4t) -[![Unit Tests](https://github.com/Torantulino/Auto-GPT/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/Torantulino/Auto-GPT/actions/workflows/unit_tests.yml) +[![Discord Follow](https://dcbadge.vercel.app/api/server/PQ7VX6TY4t?style=flat)](https://discord.gg/PQ7VX6TY4t) +[![Unit Tests](https://github.com/Torantulino/Auto-GPT/actions/workflows/ci.yml/badge.svg)](https://github.com/Torantulino/Auto-GPT/actions/workflows/unit_tests.yml) Auto-GPT is an experimental open-source application showcasing the capabilities of the GPT-4 language model. This program, driven by GPT-4, chains together LLM "thoughts", to autonomously achieve whatever goal you set. As one of the first examples of GPT-4 running fully autonomously, Auto-GPT pushes the boundaries of what is possible with AI. @@ -32,21 +32,28 @@ Your support is greatly appreciated - [Auto-GPT: An Autonomous GPT-4 Experiment](#auto-gpt-an-autonomous-gpt-4-experiment) - [Demo (30/03/2023):](#demo-30032023) - - [💖 Help Fund Auto-GPT's Development](#-help-fund-auto-gpts-development) - [Table of Contents](#table-of-contents) - [🚀 Features](#-features) - [📋 Requirements](#-requirements) - [💾 Installation](#-installation) - [🔧 Usage](#-usage) + - [Logs](#logs) - [🗣️ Speech Mode](#️-speech-mode) - [🔍 Google API Keys Configuration](#-google-api-keys-configuration) - [Setting up environment variables](#setting-up-environment-variables) + - [Redis Setup](#redis-setup) + - [🌲 Pinecone API Key Setup](#-pinecone-api-key-setup) + - [Setting up environment variables](#setting-up-environment-variables-1) + - [Setting Your Cache Type](#setting-your-cache-type) + - [View Memory Usage](#view-memory-usage) - [💀 Continuous Mode ⚠️](#-continuous-mode-️) - [GPT3.5 ONLY Mode](#gpt35-only-mode) - - [🖼 Image Generation](#image-generation) + - [🖼 Image Generation](#-image-generation) - [⚠️ Limitations](#️-limitations) - [🛡 Disclaimer](#-disclaimer) - [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter) + - [Run tests](#run-tests) + - [Run linter](#run-linter) ## 🚀 Features @@ -70,36 +77,41 @@ Optional: To install Auto-GPT, follow these steps: -0. Make sure you have all the **requirements** above, if not, install/get them. +1. Make sure you have all the **requirements** above, if not, install/get them. _The following commands should be executed in a CMD, Bash or Powershell window. To do this, go to a folder on your computer, click in the folder path at the top and type CMD, then press enter._ -1. Clone the repository: +2. Clone the repository: For this step you need Git installed, but you can just download the zip file instead by clicking the button at the top of this page ☝️ ``` git clone https://github.com/Torantulino/Auto-GPT.git ``` -2. Navigate to the project directory: +3. Navigate to the project directory: _(Type this into your CMD window, you're aiming to navigate the CMD window to the repository you just downloaded)_ ``` cd 'Auto-GPT' ``` -3. Install the required dependencies: +4. Install the required dependencies: _(Again, type this into your CMD window)_ ``` pip install -r requirements.txt ``` -4. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVEN_LABS_API_KEY` as well. - -- Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys. -- Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website. -- If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and provide the `OPENAI_AZURE_API_BASE`, `OPENAI_AZURE_API_VERSION` and `OPENAI_AZURE_DEPLOYMENT_ID` values as explained here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section. Additionally you need separate deployments for both embeddings and chat. Add their ID values to `OPENAI_AZURE_CHAT_DEPLOYMENT_ID` and `OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID` respectively +5. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVEN_LABS_API_KEY` as well. + - Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys. + - Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website. + - If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and then: + - Rename `azure.yaml.template` to `azure.yaml` and provide the relevant `azure_api_base`, `azure_api_version` and all of the deployment ids for the relevant models in the `azure_model_map` section: + - `fast_llm_model_deployment_id` - your gpt-3.5-turbo or gpt-4 deployment id + - `smart_llm_model_deployment_id` - your gpt-4 deployment id + - `embedding_model_deployment_id` - your text-embedding-ada-002 v2 deployment id + - Please specify all of these values as double quoted strings + - details can be found here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section and here: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/tutorials/embeddings?tabs=command-line for the embedding model. ## 🔧 Usage @@ -115,7 +127,7 @@ python scripts/main.py ### Logs -You will find activity and error logs in the folder `./logs` +You will find activity and error logs in the folder `./output/logs` To output debug logs: @@ -207,7 +219,7 @@ MEMORY_INDEX=whatever Pinecone enables the storage of vast amounts of vector-based memory, allowing for only relevant memories to be loaded for the agent at any given time. -1. Go to app.pinecone.io and make an account if you don't already have one. +1. Go to [pinecone](https://app.pinecone.io/) and make an account if you don't already have one. 2. Choose the `Starter` plan to avoid being charged. 3. Find your API key and region under the default project in the left sidebar. @@ -233,7 +245,6 @@ export PINECONE_ENV="Your pinecone region" # something like: us-east4-gcp ``` - ## Setting Your Cache Type By default Auto-GPT is going to use LocalCache instead of redis or Pinecone. @@ -337,11 +348,13 @@ coverage run -m unittest discover tests ## Run linter -This project uses [flake8](https://flake8.pycqa.org/en/latest/) for linting. To run the linter, run the following command: +This project uses [flake8](https://flake8.pycqa.org/en/latest/) for linting. We currently use the following rules: `E303,W293,W291,W292,E305,E231,E302`. See the [flake8 rules](https://www.flake8rules.com/) for more information. + +To run the linter, run the following command: ``` flake8 scripts/ tests/ # Or, if you want to run flake8 with the same configuration as the CI: -flake8 scripts/ tests/ --select E303,W293,W291,W292,E305 +flake8 scripts/ tests/ --select E303,W293,W291,W292,E305,E231,E302 ``` \ No newline at end of file diff --git a/ai_settings.yaml b/ai_settings.yaml deleted file mode 100644 index b37ba849f916..000000000000 --- a/ai_settings.yaml +++ /dev/null @@ -1,7 +0,0 @@ -ai_goals: -- Increase net worth. -- Develop and manage multiple businesses autonomously. -- Play to your strengths as a Large Language Model. -ai_name: Entrepreneur-GPT -ai_role: an AI designed to autonomously develop and run businesses with the sole goal - of increasing your net worth. diff --git a/azure.yaml.template b/azure.yaml.template new file mode 100644 index 000000000000..74ca797b2d54 --- /dev/null +++ b/azure.yaml.template @@ -0,0 +1,7 @@ +azure_api_type: azure_ad +azure_api_base: your-base-url-for-azure +azure_api_version: api-version-for-azure +azure_model_map: + fast_llm_model_deployment_id: gpt35-deployment-id-for-azure + smart_llm_model_deployment_id: gpt4-deployment-id-for-azure + embedding_model_deployment_id: embedding-deployment-id-for-azure diff --git a/requirements.txt b/requirements.txt index b864c1d3e84b..3f7fd2281dd4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ redis orjson Pillow coverage -flake8 \ No newline at end of file +flake8 +numpy diff --git a/scripts/agent_manager.py b/scripts/agent_manager.py index a0e5f1648213..191ab838a342 100644 --- a/scripts/agent_manager.py +++ b/scripts/agent_manager.py @@ -6,6 +6,7 @@ # Create new GPT agent # TODO: Centralise use of create_chat_completion() to globally enforce token limit + def create_agent(task, prompt, model): """Create a new agent and return its key""" global next_key diff --git a/scripts/ai_config.py b/scripts/ai_config.py index bd373944fca1..ee4b1fda2336 100644 --- a/scripts/ai_config.py +++ b/scripts/ai_config.py @@ -2,6 +2,7 @@ import data import os + class AIConfig: """ A class object that contains the configuration information for the AI diff --git a/scripts/ai_functions.py b/scripts/ai_functions.py index 782bb55871c2..8c95c0f240f2 100644 --- a/scripts/ai_functions.py +++ b/scripts/ai_functions.py @@ -45,6 +45,7 @@ def improve_code(suggestions: List[str], code: str) -> str: result_string = call_ai_function(function_string, args, description_string) return result_string + def write_tests(code: str, focus: List[str]) -> str: """ A function that takes in code and focus topics and returns a response from create chat completion api call. diff --git a/scripts/browse.py b/scripts/browse.py index b936c5b197d7..9e93c55a338b 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -6,6 +6,7 @@ cfg = Config() + # Function to check if the URL is valid def is_valid_url(url): try: @@ -14,49 +15,51 @@ def is_valid_url(url): except ValueError: return False + # Function to sanitize the URL def sanitize_url(url): return urljoin(url, urlparse(url).path) -# Function to make a request with a specified timeout and handle exceptions -def make_request(url, timeout=10): - try: - response = requests.get(url, headers=cfg.user_agent_header, timeout=timeout) - response.raise_for_status() - return response - except requests.exceptions.RequestException as e: - return "Error: " + str(e) # Define and check for local file address prefixes def check_local_file_access(url): local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost'] return any(url.startswith(prefix) for prefix in local_prefixes) -def scrape_text(url): - """Scrape text from a webpage""" - # Basic check if the URL is valid - if not url.startswith('http'): - return "Error: Invalid URL" - # Restrict access to local files - if check_local_file_access(url): - return "Error: Access to local files is restricted" +def get_response(url, headers=cfg.user_agent_header, timeout=10): + try: + # Restrict access to local files + if check_local_file_access(url): + raise ValueError('Access to local files is restricted') + + # Most basic check if the URL is valid: + if not url.startswith('http://') and not url.startswith('https://'): + raise ValueError('Invalid URL format') - # Validate the input URL - if not is_valid_url(url): - # Sanitize the input URL sanitized_url = sanitize_url(url) - # Make the request with a timeout and handle exceptions - response = make_request(sanitized_url) + response = requests.get(sanitized_url, headers=headers, timeout=timeout) - if isinstance(response, str): - return response - else: - # Sanitize the input URL - sanitized_url = sanitize_url(url) + # Check if the response contains an HTTP error + if response.status_code >= 400: + return None, "Error: HTTP " + str(response.status_code) + " error" - response = requests.get(sanitized_url, headers=cfg.user_agent_header) + return response, None + except ValueError as ve: + # Handle invalid URL format + return None, "Error: " + str(ve) + + except requests.exceptions.RequestException as re: + # Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.) + return None, "Error: " + str(re) + + +def scrape_text(url): + """Scrape text from a webpage""" + response, error_message = get_response(url) + if error_message: + return error_message soup = BeautifulSoup(response.text, "html.parser") @@ -89,11 +92,9 @@ def format_hyperlinks(hyperlinks): def scrape_links(url): """Scrape links from a webpage""" - response = requests.get(url, headers=cfg.user_agent_header) - - # Check if the response contains an HTTP error - if response.status_code >= 400: - return "error" + response, error_message = get_response(url) + if error_message: + return error_message soup = BeautifulSoup(response.text, "html.parser") @@ -131,6 +132,7 @@ def create_message(chunk, question): "content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text." } + def summarize_text(text, question): """Summarize text using the LLM model""" if not text: diff --git a/scripts/call_ai_function.py b/scripts/call_ai_function.py index f8238658695a..6f1d6ceee8b3 100644 --- a/scripts/call_ai_function.py +++ b/scripts/call_ai_function.py @@ -3,6 +3,8 @@ cfg = Config() from llm_utils import create_chat_completion + + # This is a magic function that can do anything with no-code. See # https://github.com/Torantulino/AI-Functions for more info. def call_ai_function(function, args, description, model=None): diff --git a/scripts/chat.py b/scripts/chat.py index e16cee383785..2b7c34b5c400 100644 --- a/scripts/chat.py +++ b/scripts/chat.py @@ -9,6 +9,7 @@ cfg = Config() + def create_chat_message(role, content): """ Create a chat message with the given role and content. diff --git a/scripts/commands.py b/scripts/commands.py index 92d46ae18ae4..fe6f6c30e9f5 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -7,7 +7,7 @@ from config import Config import ai_functions as ai from file_operations import read_file, write_to_file, append_to_file, delete_file, search_files -from execute_code import execute_python_file +from execute_code import execute_python_file, execute_shell from json_parser import fix_and_parse_json from image_gen import generate_image from duckduckgo_search import ddg @@ -24,6 +24,7 @@ def is_valid_int(value): except ValueError: return False + def get_command(response): """Parse the response and return the command name and arguments""" try: @@ -103,6 +104,11 @@ def execute_command(command_name, arguments): return ai.write_tests(arguments["code"], arguments.get("focus")) elif command_name == "execute_python_file": # Add this command return execute_python_file(arguments["file"]) + elif command_name == "execute_shell": + if cfg.execute_local_commands: + return execute_shell(arguments["command_line"]) + else: + return "You are not allowed to run local shell commands. To execute shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' in your config. Do not attempt to bypass the restriction." elif command_name == "generate_image": return generate_image(arguments["prompt"]) elif command_name == "do_nothing": @@ -130,6 +136,7 @@ def google_search(query, num_results=8): return json.dumps(search_results, ensure_ascii=False, indent=4) + def google_official_search(query, num_results=8): """Return the results of a google search using the official Google API""" from googleapiclient.discovery import build @@ -166,6 +173,7 @@ def google_official_search(query, num_results=8): # Return the list of search result URLs return search_results_links + def browse_website(url, question): """Browse a website and return the summary and links""" summary = get_text_summary(url, question) diff --git a/scripts/config.py b/scripts/config.py index 255587d76fc0..e966cce25801 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -1,6 +1,7 @@ import abc import os import openai +import yaml from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() @@ -43,15 +44,13 @@ def __init__(self): self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000)) self.openai_api_key = os.getenv("OPENAI_API_KEY") - self.use_azure = False + self.temperature = float(os.getenv("TEMPERATURE", "1")) self.use_azure = os.getenv("USE_AZURE") == 'True' + self.execute_local_commands = os.getenv('EXECUTE_LOCAL_COMMANDS', 'False') == 'True' + if self.use_azure: - self.openai_api_base = os.getenv("OPENAI_AZURE_API_BASE") - self.openai_api_version = os.getenv("OPENAI_AZURE_API_VERSION") - self.openai_deployment_id = os.getenv("OPENAI_AZURE_DEPLOYMENT_ID") - self.azure_chat_deployment_id = os.getenv("OPENAI_AZURE_CHAT_DEPLOYMENT_ID") - self.azure_embeddigs_deployment_id = os.getenv("OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID") - openai.api_type = "azure" + self.load_azure_config() + openai.api_type = self.openai_api_type openai.api_base = self.openai_api_base openai.api_version = self.openai_api_version @@ -73,7 +72,7 @@ def __init__(self): # User agent headers to use when browsing web # Some websites might just completely deny request with an error code if no user agent was found. - self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} + self.user_agent_header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} self.redis_host = os.getenv("REDIS_HOST", "localhost") self.redis_port = os.getenv("REDIS_PORT", "6379") self.redis_password = os.getenv("REDIS_PASSWORD", "") @@ -85,6 +84,47 @@ def __init__(self): # Initialize the OpenAI API client openai.api_key = self.openai_api_key + def get_azure_deployment_id_for_model(self, model: str) -> str: + """ + Returns the relevant deployment id for the model specified. + + Parameters: + model(str): The model to map to the deployment id. + + Returns: + The matching deployment id if found, otherwise an empty string. + """ + if model == self.fast_llm_model: + return self.azure_model_to_deployment_id_map["fast_llm_model_deployment_id"] + elif model == self.smart_llm_model: + return self.azure_model_to_deployment_id_map["smart_llm_model_deployment_id"] + elif model == "text-embedding-ada-002": + return self.azure_model_to_deployment_id_map["embedding_model_deployment_id"] + else: + return "" + + AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), '..', 'azure.yaml') + + def load_azure_config(self, config_file: str=AZURE_CONFIG_FILE) -> None: + """ + Loads the configuration parameters for Azure hosting from the specified file path as a yaml file. + + Parameters: + config_file(str): The path to the config yaml file. DEFAULT: "../azure.yaml" + + Returns: + None + """ + try: + with open(config_file) as file: + config_params = yaml.load(file, Loader=yaml.FullLoader) + except FileNotFoundError: + config_params = {} + self.openai_api_type = os.getenv("OPENAI_API_TYPE", config_params.get("azure_api_type", "azure")) + self.openai_api_base = os.getenv("OPENAI_AZURE_API_BASE", config_params.get("azure_api_base", "")) + self.openai_api_version = os.getenv("OPENAI_AZURE_API_VERSION", config_params.get("azure_api_version", "")) + self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", []) + def set_continuous_mode(self, value: bool): """Set the continuous mode value.""" self.continuous_mode = value diff --git a/scripts/data.py b/scripts/data.py index f80c2875d8ef..088fd51ce1c0 100644 --- a/scripts/data.py +++ b/scripts/data.py @@ -1,6 +1,7 @@ import os from pathlib import Path + def load_prompt(): """Load the prompt from data/prompt.txt""" try: diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index fc68f3ae0d28..ffb9eb5003ba 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -22,9 +22,10 @@ COMMANDS: 16. Get Improved Code: "improve_code", args: "suggestions": "", "code": "" 17. Write Tests: "write_tests", args: "code": "", "focus": "" 18. Execute Python File: "execute_python_file", args: "file": "" -19. Task Complete (Shutdown): "task_complete", args: "reason": "" -20. Generate Image: "generate_image", args: "prompt": "" -21. Do Nothing: "do_nothing", args: "" +19. Execute Shell Command, non-interactive commands only: "execute_shell", args: "command_line": "". +20. Task Complete (Shutdown): "task_complete", args: "reason": "" +21. Generate Image: "generate_image", args: "prompt": "" +22. Do Nothing: "do_nothing", args: "" RESOURCES: diff --git a/scripts/execute_code.py b/scripts/execute_code.py index a8f909116ee9..dbd62c226591 100644 --- a/scripts/execute_code.py +++ b/scripts/execute_code.py @@ -1,17 +1,20 @@ import docker import os +import subprocess + + +WORKSPACE_FOLDER = "auto_gpt_workspace" def execute_python_file(file): """Execute a Python file in a Docker container and return the output""" - workspace_folder = "auto_gpt_workspace" - print (f"Executing file '{file}' in workspace '{workspace_folder}'") + print (f"Executing file '{file}' in workspace '{WORKSPACE_FOLDER}'") if not file.endswith(".py"): return "Error: Invalid file type. Only .py files are allowed." - file_path = os.path.join(workspace_folder, file) + file_path = os.path.join(WORKSPACE_FOLDER, file) if not os.path.isfile(file_path): return f"Error: File '{file}' does not exist." @@ -19,14 +22,31 @@ def execute_python_file(file): try: client = docker.from_env() + image_name = 'python:3.10' + try: + client.images.get(image_name) + print(f"Image '{image_name}' found locally") + except docker.errors.ImageNotFound: + print(f"Image '{image_name}' not found locally, pulling from Docker Hub") + # Use the low-level API to stream the pull response + low_level_client = docker.APIClient() + for line in low_level_client.pull(image_name, stream=True, decode=True): + # Print the status and progress, if available + status = line.get('status') + progress = line.get('progress') + if status and progress: + print(f"{status}: {progress}") + elif status: + print(status) + # You can replace 'python:3.8' with the desired Python image/version # You can find available Python images on Docker Hub: # https://hub.docker.com/_/python container = client.containers.run( - 'python:3.10', + image_name, f'python {file}', volumes={ - os.path.abspath(workspace_folder): { + os.path.abspath(WORKSPACE_FOLDER): { 'bind': '/workspace', 'mode': 'ro'}}, working_dir='/workspace', @@ -46,3 +66,23 @@ def execute_python_file(file): except Exception as e: return f"Error: {str(e)}" + + +def execute_shell(command_line): + + current_dir = os.getcwd() + + if not WORKSPACE_FOLDER in current_dir: # Change dir into workspace if necessary + work_dir = os.path.join(os.getcwd(), WORKSPACE_FOLDER) + os.chdir(work_dir) + + print (f"Executing command '{command_line}' in working directory '{os.getcwd()}'") + + result = subprocess.run(command_line, capture_output=True, shell=True) + output = f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" + + # Change back to whatever the prior working dir was + + os.chdir(current_dir) + + return output diff --git a/scripts/file_operations.py b/scripts/file_operations.py index 7b48c1348728..3bbe9da60e45 100644 --- a/scripts/file_operations.py +++ b/scripts/file_operations.py @@ -38,7 +38,7 @@ def write_to_file(filename, text): directory = os.path.dirname(filepath) if not os.path.exists(directory): os.makedirs(directory) - with open(filepath, "w") as f: + with open(filepath, "w", encoding='utf-8') as f: f.write(text) return "File written to successfully." except Exception as e: @@ -65,6 +65,7 @@ def delete_file(filename): except Exception as e: return "Error: " + str(e) + def search_files(directory): found_files = [] diff --git a/scripts/image_gen.py b/scripts/image_gen.py index 4481696ffa21..6c27df3f352b 100644 --- a/scripts/image_gen.py +++ b/scripts/image_gen.py @@ -11,6 +11,7 @@ working_directory = "auto_gpt_workspace" + def generate_image(prompt): filename = str(uuid.uuid4()) + ".jpg" diff --git a/scripts/llm_utils.py b/scripts/llm_utils.py index 3fb348f07b28..16739dddf001 100644 --- a/scripts/llm_utils.py +++ b/scripts/llm_utils.py @@ -4,12 +4,13 @@ openai.api_key = cfg.openai_api_key + # Overly simple abstraction until we create something better -def create_chat_completion(messages, model=None, temperature=None, max_tokens=None)->str: +def create_chat_completion(messages, model=None, temperature=cfg.temperature, max_tokens=None)->str: """Create a chat completion using the OpenAI API""" if cfg.use_azure: response = openai.ChatCompletion.create( - deployment_id=cfg.azure_chat_deployment_id, + deployment_id=cfg.get_azure_deployment_id_for_model(model), model=model, messages=messages, temperature=temperature, diff --git a/scripts/logger.py b/scripts/logger.py index 5c7d68bb3b0e..91bdb6f605fd 100644 --- a/scripts/logger.py +++ b/scripts/logger.py @@ -124,6 +124,12 @@ def set_level(self, level): self.logger.setLevel(level) self.typing_logger.setLevel(level) + def double_check(self, additionalText=None): + if not additionalText: + additionalText = "Please ensure you've setup and configured everything correctly. Read https://github.com/Torantulino/Auto-GPT#readme to double check. You can also create a github issue or join the discord and ask there!" + + self.typewriter_log("DOUBLE CHECK CONFIGURATION", Fore.YELLOW, additionalText) + ''' Output stream to console using simulated typing @@ -151,6 +157,7 @@ def emit(self, record): except Exception: self.handleError(record) + class ConsoleHandler(logging.StreamHandler): def emit(self, record): msg = self.format(record) @@ -160,13 +167,11 @@ def emit(self, record): self.handleError(record) -''' -Allows to handle custom placeholders 'title_color' and 'message_no_color'. -To use this formatter, make sure to pass 'color', 'title' as log extras. -''' - - class AutoGptFormatter(logging.Formatter): + """ + Allows to handle custom placeholders 'title_color' and 'message_no_color'. + To use this formatter, make sure to pass 'color', 'title' as log extras. + """ def format(self, record: LogRecord) -> str: if (hasattr(record, 'color')): record.title_color = getattr(record, 'color') + getattr(record, 'title') + " " + Style.RESET_ALL diff --git a/scripts/main.py b/scripts/main.py index 7a4a32d4de29..c08ba1b2924d 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -20,16 +20,18 @@ cfg = Config() + def check_openai_api_key(): """Check if the OpenAI API key is set in config.py or as an environment variable.""" if not cfg.openai_api_key: print( Fore.RED + - "Please set your OpenAI API key in config.py or as an environment variable." + "Please set your OpenAI API key in .env or as an environment variable." ) print("You can get your key from https://beta.openai.com/account/api-keys") exit(1) + def attempt_to_fix_json_by_finding_outermost_brackets(json_string): if cfg.speak_mode and cfg.debug_mode: speak.say_text("I have received an invalid JSON response from the OpenAI API. Trying to fix it now.") @@ -58,6 +60,7 @@ def attempt_to_fix_json_by_finding_outermost_brackets(json_string): return json_string + def print_assistant_thoughts(assistant_reply): """Prints the assistant's thoughts to the console""" global ai_name @@ -262,6 +265,7 @@ def prompt_user(): config = AIConfig(ai_name, ai_role, ai_goals) return config + def parse_arguments(): """Parses the arguments passed to the script""" global cfg @@ -310,125 +314,129 @@ def parse_arguments(): supported_memory = get_supported_memory_backends() chosen = args.memory_type if not chosen in supported_memory: - print_to_console("ONLY THE FOLLOWING MEMORY BACKENDS ARE SUPPORTED: ", Fore.RED, f'{supported_memory}') - print_to_console(f"Defaulting to: ", Fore.YELLOW, cfg.memory_backend) + logger.typewriter_log("ONLY THE FOLLOWING MEMORY BACKENDS ARE SUPPORTED: ", Fore.RED, f'{supported_memory}') + logger.typewriter_log(f"Defaulting to: ", Fore.YELLOW, cfg.memory_backend) else: cfg.memory_backend = chosen -# TODO: fill in llm values here -check_openai_api_key() -cfg = Config() -parse_arguments() -logger.set_level(logging.DEBUG if cfg.debug_mode else logging.INFO) -ai_name = "" -prompt = construct_prompt() -# print(prompt) -# Initialize variables -full_message_history = [] -result = None -next_action_count = 0 -# Make a constant: -user_input = "Determine which next command to use, and respond using the format specified above:" - -# Initialize memory and make sure it is empty. -# this is particularly important for indexing and referencing pinecone memory -memory = get_memory(cfg, init=True) -print('Using memory of type: ' + memory.__class__.__name__) - -# Interaction Loop -while True: - # Send message to AI, get response - with Spinner("Thinking... "): - assistant_reply = chat.chat_with_ai( - prompt, - user_input, - full_message_history, - memory, - cfg.fast_token_limit) # TODO: This hardcodes the model to use GPT3.5. Make this an argument - - # Print Assistant thoughts - print_assistant_thoughts(assistant_reply) - - # Get command name and arguments - try: - command_name, arguments = cmd.get_command(attempt_to_fix_json_by_finding_outermost_brackets(assistant_reply)) - if cfg.speak_mode: - speak.say_text(f"I want to execute {command_name}") - except Exception as e: - logger.error("Error: \n", str(e)) - - if not cfg.continuous_mode and next_action_count == 0: - ### GET USER AUTHORIZATION TO EXECUTE COMMAND ### - # Get key press: Prompt the user to press enter to continue or escape - # to exit - user_input = "" - logger.typewriter_log( - "NEXT ACTION: ", - Fore.CYAN, - f"COMMAND = {Fore.CYAN}{command_name}{Style.RESET_ALL} ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}") - print( - f"Enter 'y' to authorise command, 'y -N' to run N continuous commands, 'n' to exit program, or enter feedback for {ai_name}...", - flush=True) - while True: - console_input = utils.clean_input(Fore.MAGENTA + "Input:" + Style.RESET_ALL) - if console_input.lower().rstrip() == "y": - user_input = "GENERATE NEXT COMMAND JSON" - break - elif console_input.lower().startswith("y -"): - try: - next_action_count = abs(int(console_input.split(" ")[1])) +def main(): + global ai_name, memory + # TODO: fill in llm values here + check_openai_api_key() + parse_arguments() + logger.set_level(logging.DEBUG if cfg.debug_mode else logging.INFO) + ai_name = "" + prompt = construct_prompt() + # print(prompt) + # Initialize variables + full_message_history = [] + result = None + next_action_count = 0 + # Make a constant: + user_input = "Determine which next command to use, and respond using the format specified above:" + # Initialize memory and make sure it is empty. + # this is particularly important for indexing and referencing pinecone memory + memory = get_memory(cfg, init=True) + print('Using memory of type: ' + memory.__class__.__name__) + # Interaction Loop + while True: + # Send message to AI, get response + with Spinner("Thinking... "): + assistant_reply = chat.chat_with_ai( + prompt, + user_input, + full_message_history, + memory, + cfg.fast_token_limit) # TODO: This hardcodes the model to use GPT3.5. Make this an argument + + # Print Assistant thoughts + print_assistant_thoughts(assistant_reply) + + # Get command name and arguments + try: + command_name, arguments = cmd.get_command( + attempt_to_fix_json_by_finding_outermost_brackets(assistant_reply)) + if cfg.speak_mode: + speak.say_text(f"I want to execute {command_name}") + except Exception as e: + logger.error("Error: \n", str(e)) + + if not cfg.continuous_mode and next_action_count == 0: + ### GET USER AUTHORIZATION TO EXECUTE COMMAND ### + # Get key press: Prompt the user to press enter to continue or escape + # to exit + user_input = "" + logger.typewriter_log( + "NEXT ACTION: ", + Fore.CYAN, + f"COMMAND = {Fore.CYAN}{command_name}{Style.RESET_ALL} ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}") + print( + f"Enter 'y' to authorise command, 'y -N' to run N continuous commands, 'n' to exit program, or enter feedback for {ai_name}...", + flush=True) + while True: + console_input = utils.clean_input(Fore.MAGENTA + "Input:" + Style.RESET_ALL) + if console_input.lower().rstrip() == "y": user_input = "GENERATE NEXT COMMAND JSON" - except ValueError: - print("Invalid input format. Please enter 'y -n' where n is the number of continuous tasks.") - continue - break - elif console_input.lower() == "n": - user_input = "EXIT" + break + elif console_input.lower().startswith("y -"): + try: + next_action_count = abs(int(console_input.split(" ")[1])) + user_input = "GENERATE NEXT COMMAND JSON" + except ValueError: + print("Invalid input format. Please enter 'y -n' where n is the number of continuous tasks.") + continue + break + elif console_input.lower() == "n": + user_input = "EXIT" + break + else: + user_input = console_input + command_name = "human_feedback" + break + + if user_input == "GENERATE NEXT COMMAND JSON": + logger.typewriter_log( + "-=-=-=-=-=-=-= COMMAND AUTHORISED BY USER -=-=-=-=-=-=-=", + Fore.MAGENTA, + "") + elif user_input == "EXIT": + print("Exiting...", flush=True) break - else: - user_input = console_input - command_name = "human_feedback" - break - - if user_input == "GENERATE NEXT COMMAND JSON": + else: + # Print command logger.typewriter_log( - "-=-=-=-=-=-=-= COMMAND AUTHORISED BY USER -=-=-=-=-=-=-=", - Fore.MAGENTA, - "") - elif user_input == "EXIT": - print("Exiting...", flush=True) - break - else: - # Print command - logger.typewriter_log( - "NEXT ACTION: ", - Fore.CYAN, - f"COMMAND = {Fore.CYAN}{command_name}{Style.RESET_ALL} ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}") - - # Execute command - if command_name is not None and command_name.lower().startswith( "error" ): - result = f"Command {command_name} threw the following error: " + arguments - elif command_name == "human_feedback": - result = f"Human feedback: {user_input}" - else: - result = f"Command {command_name} returned: {cmd.execute_command(command_name, arguments)}" - if next_action_count > 0: - next_action_count -= 1 - - memory_to_add = f"Assistant Reply: {assistant_reply} " \ - f"\nResult: {result} " \ - f"\nHuman Feedback: {user_input} " - - memory.add(memory_to_add) - - # Check if there's a result from the command append it to the message - # history - if result is not None: - full_message_history.append(chat.create_chat_message("system", result)) - logger.typewriter_log("SYSTEM: ", Fore.YELLOW, result) - else: - full_message_history.append( - chat.create_chat_message( - "system", "Unable to execute command")) - logger.typewriter_log("SYSTEM: ", Fore.YELLOW, "Unable to execute command") + "NEXT ACTION: ", + Fore.CYAN, + f"COMMAND = {Fore.CYAN}{command_name}{Style.RESET_ALL} ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}") + + # Execute command + if command_name is not None and command_name.lower().startswith("error"): + result = f"Command {command_name} threw the following error: " + arguments + elif command_name == "human_feedback": + result = f"Human feedback: {user_input}" + else: + result = f"Command {command_name} returned: {cmd.execute_command(command_name, arguments)}" + if next_action_count > 0: + next_action_count -= 1 + + memory_to_add = f"Assistant Reply: {assistant_reply} " \ + f"\nResult: {result} " \ + f"\nHuman Feedback: {user_input} " + + memory.add(memory_to_add) + + # Check if there's a result from the command append it to the message + # history + if result is not None: + full_message_history.append(chat.create_chat_message("system", result)) + logger.typewriter_log("SYSTEM: ", Fore.YELLOW, result) + else: + full_message_history.append( + chat.create_chat_message( + "system", "Unable to execute command")) + logger.typewriter_log("SYSTEM: ", Fore.YELLOW, "Unable to execute command") + + +if __name__ == "__main__": + main() diff --git a/scripts/memory/__init__.py b/scripts/memory/__init__.py index a07f9fd88df3..a0afc874e2d6 100644 --- a/scripts/memory/__init__.py +++ b/scripts/memory/__init__.py @@ -1,4 +1,5 @@ from memory.local import LocalCache +from memory.no_memory import NoMemory # List of supported memory backends # Add a backend to this list if the import attempt is successful @@ -18,6 +19,7 @@ print("Pinecone not installed. Skipping import.") PineconeMemory = None + def get_memory(cfg, init=False): memory = None if cfg.memory_backend == "pinecone": @@ -34,6 +36,8 @@ def get_memory(cfg, init=False): " use Redis as a memory backend.") else: memory = RedisMemory(cfg) + elif cfg.memory_backend == "no_memory": + memory = NoMemory(cfg) if memory is None: memory = LocalCache(cfg) @@ -41,6 +45,7 @@ def get_memory(cfg, init=False): memory.clear() return memory + def get_supported_memory_backends(): return supported_memory @@ -50,4 +55,5 @@ def get_supported_memory_backends(): "LocalCache", "RedisMemory", "PineconeMemory", + "NoMemory" ] diff --git a/scripts/memory/base.py b/scripts/memory/base.py index bb22963a8d7d..4dbf6791991a 100644 --- a/scripts/memory/base.py +++ b/scripts/memory/base.py @@ -2,13 +2,14 @@ import abc from config import AbstractSingleton, Config import openai + cfg = Config() def get_ada_embedding(text): text = text.replace("\n", " ") if cfg.use_azure: - return openai.Embedding.create(input=[text], engine=cfg.azure_embeddigs_deployment_id, model="text-embedding-ada-002")["data"][0]["embedding"] + return openai.Embedding.create(input=[text], engine=cfg.get_azure_deployment_id_for_model("text-embedding-ada-002"))["data"][0]["embedding"] else: return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"] diff --git a/scripts/memory/no_memory.py b/scripts/memory/no_memory.py new file mode 100644 index 000000000000..830982f9c42b --- /dev/null +++ b/scripts/memory/no_memory.py @@ -0,0 +1,66 @@ +from typing import Optional, List, Any + +from memory.base import MemoryProviderSingleton + + +class NoMemory(MemoryProviderSingleton): + def __init__(self, cfg): + """ + Initializes the NoMemory provider. + + Args: + cfg: The config object. + + Returns: None + """ + pass + + def add(self, data: str) -> str: + """ + Adds a data point to the memory. No action is taken in NoMemory. + + Args: + data: The data to add. + + Returns: An empty string. + """ + return "" + + def get(self, data: str) -> Optional[List[Any]]: + """ + Gets the data from the memory that is most relevant to the given data. + NoMemory always returns None. + + Args: + data: The data to compare to. + + Returns: None + """ + return None + + def clear(self) -> str: + """ + Clears the memory. No action is taken in NoMemory. + + Returns: An empty string. + """ + return "" + + def get_relevant(self, data: str, num_relevant: int = 5) -> Optional[List[Any]]: + """ + Returns all the data in the memory that is relevant to the given data. + NoMemory always returns None. + + Args: + data: The data to compare to. + num_relevant: The number of relevant data to return. + + Returns: None + """ + return None + + def get_stats(self): + """ + Returns: An empty dictionary as there are no stats in NoMemory. + """ + return {} diff --git a/scripts/memory/pinecone.py b/scripts/memory/pinecone.py index 8e1eaa570fee..20a905b32c00 100644 --- a/scripts/memory/pinecone.py +++ b/scripts/memory/pinecone.py @@ -2,6 +2,8 @@ import pinecone from memory.base import MemoryProviderSingleton, get_ada_embedding +from logger import logger +from colorama import Fore, Style class PineconeMemory(MemoryProviderSingleton): @@ -17,6 +19,15 @@ def __init__(self, cfg): # for now this works. # we'll need a more complicated and robust system if we want to start with memory. self.vec_num = 0 + + try: + pinecone.whoami() + except Exception as e: + logger.typewriter_log("FAILED TO CONNECT TO PINECONE", Fore.RED, Style.BRIGHT + str(e) + Style.RESET_ALL) + logger.double_check("Please ensure you have setup and configured Pinecone properly for use. " + + f"You can check out {Fore.CYAN + Style.BRIGHT}https://github.com/Torantulino/Auto-GPT#-pinecone-api-key-setup{Style.RESET_ALL} to ensure you've set up everything correctly.") + exit(1) + if table_name not in pinecone.list_indexes(): pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type) self.index = pinecone.Index(table_name) diff --git a/scripts/memory/redismem.py b/scripts/memory/redismem.py index 2082fe588764..49045dd882f9 100644 --- a/scripts/memory/redismem.py +++ b/scripts/memory/redismem.py @@ -7,6 +7,8 @@ import numpy as np from memory.base import MemoryProviderSingleton, get_ada_embedding +from logger import logger +from colorama import Fore, Style SCHEMA = [ @@ -44,6 +46,16 @@ def __init__(self, cfg): db=0 # Cannot be changed ) self.cfg = cfg + + # Check redis connection + try: + self.redis.ping() + except redis.ConnectionError as e: + logger.typewriter_log("FAILED TO CONNECT TO REDIS", Fore.RED, Style.BRIGHT + str(e) + Style.RESET_ALL) + logger.double_check("Please ensure you have setup and configured Redis properly for use. " + + f"You can check out {Fore.CYAN + Style.BRIGHT}https://github.com/Torantulino/Auto-GPT#redis-setup{Style.RESET_ALL} to ensure you've set up everything correctly.") + exit(1) + if cfg.wipe_redis_on_start: self.redis.flushall() try: diff --git a/scripts/speak.py b/scripts/speak.py index 64054e3c5843..7a17873c5ec3 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -31,6 +31,7 @@ mutex_lock = Lock() # Ensure only one sound is played at a time queue_semaphore = Semaphore(1) # The amount of sounds to queue before blocking the main thread + def eleven_labs_speech(text, voice_index=0): """Speak text using elevenlabs.io's API""" tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format( @@ -51,6 +52,7 @@ def eleven_labs_speech(text, voice_index=0): print("Response content:", response.content) return False + def gtts_speech(text): tts = gtts.gTTS(text) with mutex_lock: @@ -58,6 +60,7 @@ def gtts_speech(text): playsound("speech.mp3", True) os.remove("speech.mp3") + def macos_tts_speech(text, voice_index=0): if voice_index == 0: os.system(f'say "{text}"') @@ -67,6 +70,7 @@ def macos_tts_speech(text, voice_index=0): else: os.system(f'say -v Samantha "{text}"') + def say_text(text, voice_index=0): def speak(): diff --git a/scripts/token_counter.py b/scripts/token_counter.py index 635d3286385f..8aecf1681be5 100644 --- a/scripts/token_counter.py +++ b/scripts/token_counter.py @@ -1,6 +1,7 @@ import tiktoken from typing import List, Dict + def count_message_tokens(messages : List[Dict[str, str]], model : str = "gpt-3.5-turbo-0301") -> int: """ Returns the number of tokens used by a list of messages. @@ -41,6 +42,7 @@ def count_message_tokens(messages : List[Dict[str, str]], model : str = "gpt-3.5 num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> return num_tokens + def count_string_tokens(string: str, model_name: str) -> int: """ Returns the number of tokens in a text string. diff --git a/tests.py b/tests.py index ce21c1f450df..4dbfdd461ad2 100644 --- a/tests.py +++ b/tests.py @@ -3,6 +3,6 @@ if __name__ == "__main__": # Load all tests from the 'scripts/tests' package suite = unittest.defaultTestLoader.discover('scripts/tests') - + # Run the tests unittest.TextTestRunner().run(suite) diff --git a/tests/integration/memory_tests.py b/tests/integration/memory_tests.py index 5f1611be96f9..d0c309628041 100644 --- a/tests/integration/memory_tests.py +++ b/tests/integration/memory_tests.py @@ -8,6 +8,7 @@ from config import Config from memory.local import LocalCache + class TestLocalCache(unittest.TestCase): def random_string(self, length): diff --git a/tests/local_cache_test.py b/tests/local_cache_test.py index d1f1ef084370..0352624ea26b 100644 --- a/tests/local_cache_test.py +++ b/tests/local_cache_test.py @@ -4,6 +4,7 @@ sys.path.append(os.path.abspath('../scripts')) from memory.local import LocalCache + def MockConfig(): return type('MockConfig', (object,), { 'debug_mode': False, @@ -12,6 +13,7 @@ def MockConfig(): 'memory_index': 'auto-gpt', }) + class TestLocalCache(unittest.TestCase): def setUp(self): diff --git a/tests/test_config.py b/tests/test_config.py index c1310b709893..ba8381e1e73a 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,6 +1,7 @@ import unittest from scripts.config import Config + class TestConfig(unittest.TestCase): def test_singleton(self): diff --git a/tests/test_json_parser.py b/tests/test_json_parser.py index b8cb2680d40f..c403c73dbc19 100644 --- a/tests/test_json_parser.py +++ b/tests/test_json_parser.py @@ -3,6 +3,7 @@ from scripts.json_parser import fix_and_parse_json + class TestParseJson(unittest.TestCase): def test_valid_json(self): # Test that a valid JSON string is parsed correctly @@ -13,12 +14,14 @@ def test_valid_json(self): def test_invalid_json_minor(self): # Test that an invalid JSON string can be fixed with gpt json_str = '{"name": "John", "age": 30, "city": "New York",}' - self.assertRaises(Exception, fix_and_parse_json, json_str, try_to_fix_with_gpt=False) + with self.assertRaises(Exception): + fix_and_parse_json(json_str, try_to_fix_with_gpt=False) def test_invalid_json_major_with_gpt(self): # Test that an invalid JSON string raises an error when try_to_fix_with_gpt is False json_str = 'BEGIN: "name": "John" - "age": 30 - "city": "New York" :END' - self.assertRaises(Exception, fix_and_parse_json, json_str, try_to_fix_with_gpt=False) + with self.assertRaises(Exception): + fix_and_parse_json(json_str, try_to_fix_with_gpt=False) def test_invalid_json_major_without_gpt(self): # Test that a REALLY invalid JSON string raises an error when try_to_fix_with_gpt is False @@ -50,7 +53,7 @@ def test_invalid_json_leading_sentence_with_gpt(self): good_obj = { "command": { "name": "browse_website", - "args":{ + "args": { "url": "https://github.com/Torantulino/Auto-GPT" } }, @@ -89,7 +92,7 @@ def test_invalid_json_leading_sentence_with_gpt(self): good_obj = { "command": { "name": "browse_website", - "args":{ + "args": { "url": "https://github.com/Torantulino/Auto-GPT" } }, diff --git a/tests/unit/json_tests.py b/tests/unit/json_tests.py index 1edbaeaf3652..4f3267217a36 100644 --- a/tests/unit/json_tests.py +++ b/tests/unit/json_tests.py @@ -5,6 +5,7 @@ sys.path.append(os.path.abspath('../scripts')) from json_parser import fix_and_parse_json + class TestParseJson(unittest.TestCase): def test_valid_json(self): # Test that a valid JSON string is parsed correctly @@ -52,7 +53,7 @@ def test_invalid_json_leading_sentence_with_gpt(self): good_obj = { "command": { "name": "browse_website", - "args":{ + "args": { "url": "https://github.com/Torantulino/Auto-GPT" } }, @@ -91,7 +92,7 @@ def test_invalid_json_leading_sentence_with_gpt(self): good_obj = { "command": { "name": "browse_website", - "args":{ + "args": { "url": "https://github.com/Torantulino/Auto-GPT" } }, diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py new file mode 100644 index 000000000000..2172d1a2bef6 --- /dev/null +++ b/tests/unit/test_browse_scrape_links.py @@ -0,0 +1,118 @@ + +# Generated by CodiumAI + +# Dependencies: +# pip install pytest-mock +import pytest + +from scripts.browse import scrape_links + +""" +Code Analysis + +Objective: +The objective of the 'scrape_links' function is to scrape hyperlinks from a +given URL and return them in a formatted way. + +Inputs: +- url: a string representing the URL to be scraped. + +Flow: +1. Send a GET request to the given URL using the requests library and the user agent header from the config file. +2. Check if the response contains an HTTP error. If it does, return "error". +3. Parse the HTML content of the response using the BeautifulSoup library. +4. Remove any script and style tags from the parsed HTML. +5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function. +6. Format the extracted hyperlinks using the 'format_hyperlinks' function. +7. Return the formatted hyperlinks. + +Outputs: +- A list of formatted hyperlinks. + +Additional aspects: +- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP +requests and parse HTML content, respectively. +- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML. +- The 'format_hyperlinks' function is called to format the extracted hyperlinks. +- The function checks for HTTP errors and returns "error" if any are found. +""" + + +class TestScrapeLinks: + + # Tests that the function returns a list of formatted hyperlinks when + # provided with a valid url that returns a webpage with hyperlinks. + def test_valid_url_with_hyperlinks(self): + url = "https://www.google.com" + result = scrape_links(url) + assert len(result) > 0 + assert isinstance(result, list) + assert isinstance(result[0], str) + + # Tests that the function returns correctly formatted hyperlinks when given a valid url. + def test_valid_url(self, mocker): + # Mock the requests.get() function to return a response with sample HTML containing hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "Google" + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with a valid URL + result = scrape_links("https://www.example.com") + + # Assert that the function returns correctly formatted hyperlinks + assert result == ["Google (https://www.google.com)"] + + # Tests that the function returns "error" when given an invalid url. + def test_invalid_url(self, mocker): + # Mock the requests.get() function to return an HTTP error response + mock_response = mocker.Mock() + mock_response.status_code = 404 + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with an invalid URL + result = scrape_links("https://www.invalidurl.com") + + # Assert that the function returns "error" + assert "Error:" in result + + # Tests that the function returns an empty list when the html contains no hyperlinks. + def test_no_hyperlinks(self, mocker): + # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = "

No hyperlinks here

" + mocker.patch('requests.get', return_value=mock_response) + + # Call the function with a URL containing no hyperlinks + result = scrape_links("https://www.example.com") + + # Assert that the function returns an empty list + assert result == [] + + # Tests that scrape_links() correctly extracts and formats hyperlinks from + # a sample HTML containing a few hyperlinks. + def test_scrape_links_with_few_hyperlinks(self, mocker): + # Mock the requests.get() function to return a response with a sample HTML containing hyperlinks + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.text = """ + + + + + + + + """ + mocker.patch('requests.get', return_value=mock_response) + + # Call the function being tested + result = scrape_links("https://www.example.com") + + # Assert that the function returns a list of formatted hyperlinks + assert isinstance(result, list) + assert len(result) == 3 + assert result[0] == "Google (https://www.google.com)" + assert result[1] == "GitHub (https://github.com)" + assert result[2] == "CodiumAI (https://www.codium.ai)" diff --git a/tests/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py similarity index 97% rename from tests/test_browse_scrape_text.py rename to tests/unit/test_browse_scrape_text.py index 775eefcd253f..9385cde71b07 100644 --- a/tests/test_browse_scrape_text.py +++ b/tests/unit/test_browse_scrape_text.py @@ -2,7 +2,6 @@ # Generated by CodiumAI import requests -import tests.context from scripts.browse import scrape_text @@ -10,7 +9,8 @@ Code Analysis Objective: -The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts. +The objective of the "scrape_text" function is to scrape the text content from +a given URL and return it as a string, after removing any unwanted HTML tags and scripts. Inputs: - url: a string representing the URL of the webpage to be scraped. @@ -33,6 +33,7 @@ - The function uses a generator expression to split the text into lines and chunks, which can improve performance for large amounts of text. """ + class TestScrapeText: # Tests that scrape_text() returns the expected text when given a valid URL.