From 23ceac0fe0d43c6ff2e7e5fec6d9f7db1fe07b3f Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Mon, 25 Sep 2023 21:42:24 +0000 Subject: [PATCH] cleanup --- notebook/agentchat_RetrieveChat.ipynb | 2 +- notebook/agentchat_groupchat_research.ipynb | 4 +- notebook/agentchat_human_feedback.ipynb | 2 +- notebook/agentchat_planning.ipynb | 4 +- notebook/agentchat_stream.ipynb | 2 +- notebook/agentchat_two_users.ipynb | 74 +- notebook/agentchat_web_info.ipynb | 2 +- notebook/research/autogen_code.ipynb | 790 -------------------- notebook/research/math_level5counting.ipynb | 784 ------------------- 9 files changed, 42 insertions(+), 1622 deletions(-) delete mode 100644 notebook/research/autogen_code.ipynb delete mode 100644 notebook/research/math_level5counting.ipynb diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb index 169e6b1a3d7c..43c6ccd91785 100644 --- a/notebook/agentchat_RetrieveChat.ipynb +++ b/notebook/agentchat_RetrieveChat.ipynb @@ -79,7 +79,7 @@ "import autogen\n", "\n", "config_list = autogen.config_list_from_json(\n", - " env_or_file=\".config.local\",\n", + " env_or_file=\"OAI_CONFIG_LIST\",\n", " file_location=\".\",\n", " filter_dict={\n", " \"model\": {\n", diff --git a/notebook/agentchat_groupchat_research.ipynb b/notebook/agentchat_groupchat_research.ipynb index 599309f1c689..8d27935ae1e6 100644 --- a/notebook/agentchat_groupchat_research.ipynb +++ b/notebook/agentchat_groupchat_research.ipynb @@ -43,7 +43,7 @@ "source": [ "## Set your API Endpoint\n", "\n", - "The [`config_list_from_json`](https://microsoft.github.io/FLAML/docs/reference/autogen/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file." + "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file." ] }, { @@ -67,7 +67,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "It first looks for environment variable \"OAI_CONFIG_LIST\" which needs to be a valid json string. If that variable is not found, it then looks for a json file named \"OAI_CONFIG_LIST\". It filters the configs by models (you can filter by other keys as well). Only the gpt-4-32k models are kept in the list based on the filter condition.\n", + "It first looks for environment variable \"OAI_CONFIG_LIST\" which needs to be a valid json string. If that variable is not found, it then looks for a json file named \"OAI_CONFIG_LIST\". It filters the configs by models (you can filter by other keys as well).\n", "\n", "The config list looks like the following:\n", "```python\n", diff --git a/notebook/agentchat_human_feedback.ipynb b/notebook/agentchat_human_feedback.ipynb index 232462ad7e66..ddf9921be928 100644 --- a/notebook/agentchat_human_feedback.ipynb +++ b/notebook/agentchat_human_feedback.ipynb @@ -55,7 +55,7 @@ "source": [ "## Set your API Endpoint\n", "\n", - "The [`config_list_from_json`](https://microsoft.github.io/FLAML/docs/reference/autogen/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file." + "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file." ] }, { diff --git a/notebook/agentchat_planning.ipynb b/notebook/agentchat_planning.ipynb index 93513fb6415f..8b0d822be49b 100644 --- a/notebook/agentchat_planning.ipynb +++ b/notebook/agentchat_planning.ipynb @@ -55,11 +55,11 @@ "source": [ "## Set your API Endpoint\n", "\n", - "* The [`config_list_openai_aoai`](https://microsoft.github.io/FLAML/docs/reference/autogen/oai/openai_utils#config_list_openai_aoai) function tries to create a list of configurations using Azure OpenAI endpoints and OpenAI endpoints. It assumes the api keys and api bases are stored in the corresponding environment variables or local txt files:\n", + "* The [`config_list_openai_aoai`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_openai_aoai) function tries to create a list of configurations using Azure OpenAI endpoints and OpenAI endpoints. It assumes the api keys and api bases are stored in the corresponding environment variables or local txt files:\n", " - OpenAI API key: os.environ[\"OPENAI_API_KEY\"] or `openai_api_key_file=\"key_openai.txt\"`.\n", " - Azure OpenAI API key: os.environ[\"AZURE_OPENAI_API_KEY\"] or `aoai_api_key_file=\"key_aoai.txt\"`. Multiple keys can be stored, one per line.\n", " - Azure OpenAI API base: os.environ[\"AZURE_OPENAI_API_BASE\"] or `aoai_api_base_file=\"base_aoai.txt\"`. Multiple bases can be stored, one per line.\n", - "* The [`config_list_from_json`](https://microsoft.github.io/FLAML/docs/reference/autogen/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file. It first looks for environment variable `env_or_file` which needs to be a valid json string. If that variable is not found, it then looks for a json file with the same name. It filters the configs by filter_dict.\n", + "* The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file. It first looks for an environment variable with a specified name. The value of the environment variable needs to be a valid json string. If that variable is not found, it then looks for a json file with the same name. It filters the configs by filter_dict.\n", "\n", "It's OK to have only the OpenAI API key, or only the Azure OpenAI API key + base. If you open this notebook in colab, you can upload your files by clicking the file icon on the left panel and then choose \"upload file\" icon.\n" ] diff --git a/notebook/agentchat_stream.ipynb b/notebook/agentchat_stream.ipynb index bc2ee88f1c7b..a0edb1ca3216 100644 --- a/notebook/agentchat_stream.ipynb +++ b/notebook/agentchat_stream.ipynb @@ -55,7 +55,7 @@ "source": [ "## Set your API Endpoint\n", "\n", - "The [`config_list_from_json`](https://microsoft.github.io/FLAML/docs/reference/autogen/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file.\n" + "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file.\n" ] }, { diff --git a/notebook/agentchat_two_users.ipynb b/notebook/agentchat_two_users.ipynb index 1c20a58b19fc..07164504b8b6 100644 --- a/notebook/agentchat_two_users.ipynb +++ b/notebook/agentchat_two_users.ipynb @@ -5,7 +5,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\"Open" + "\"Open" ] }, { @@ -19,15 +19,15 @@ "source": [ "# Auto Generated Agent Chat: Collaborative Task Solving with Multiple Agents and Human Users\n", "\n", - "`flaml.autogen` offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framwork allows tool use and human participance through multi-agent conversation. Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n", + "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framwork allows tool use and human participance through multi-agent conversation. Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n", "\n", "In this notebook, we demonstrate an application involving multiple agents and human users to work together and accomplish a task. `AssistantAgent` is an LLM-based agent that can write Python code (in a Python coding block) for a user to execute for a given task. `UserProxyAgent` is an agent which serves as a proxy for a user to execute the code written by `AssistantAgent`. We create multiple `UserProxyAgent` instances which can represent different human users.\n", "\n", "## Requirements\n", "\n", - "FLAML requires `Python>=3.8`. To run this notebook example, please install flaml with the [autogen] option:\n", + "AutoGen requires `Python>=3.8`. To run this notebook example, please install:\n", "```bash\n", - "pip install flaml[autogen]\n", + "pip install pyautogen\n", "```" ] }, @@ -44,7 +44,7 @@ }, "outputs": [], "source": [ - "# %pip install flaml[autogen]~=2.0.2" + "# %pip install pyautogen~=0.1.1" ] }, { @@ -54,13 +54,35 @@ "source": [ "## Set your API Endpoint\n", "\n", - "* The [`config_list_openai_aoai`](https://microsoft.github.io/FLAML/docs/reference/autogen/oai/openai_utils#config_list_openai_aoai) function tries to create a list of configurations using Azure OpenAI endpoints and OpenAI endpoints. It assumes the api keys and api bases are stored in the corresponding environment variables or local txt files:\n", - " - OpenAI API key: os.environ[\"OPENAI_API_KEY\"] or `openai_api_key_file=\"key_openai.txt\"`.\n", - " - Azure OpenAI API key: os.environ[\"AZURE_OPENAI_API_KEY\"] or `aoai_api_key_file=\"key_aoai.txt\"`. Multiple keys can be stored, one per line.\n", - " - Azure OpenAI API base: os.environ[\"AZURE_OPENAI_API_BASE\"] or `aoai_api_base_file=\"base_aoai.txt\"`. Multiple bases can be stored, one per line.\n", - "* The [`config_list_from_json`](https://microsoft.github.io/FLAML/docs/reference/autogen/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file. It first looks for environment variable `env_or_file` which needs to be a valid json string. If that variable is not found, it then looks for a json file with the same name. It filters the configs by filter_dict.\n", + "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file.\n", "\n", - "It's OK to have only the OpenAI API key, or only the Azure OpenAI API key + base. If you open this notebook in colab, you can upload your files by clicking the file icon on the left panel and then choose \"upload file\" icon.\n" + "It first looks for an environment variable of a specified name (\"OAI_CONFIG_LIST\" in this example) which needs to be a valid json string. If that variable is not found, it then looks for a json file with the same name. It filters the configs by models (you can filter by other keys as well).\n", + "\n", + "The json looks like the following:\n", + "```json\n", + "[\n", + " {\n", + " \"model\": \"gpt-4\",\n", + " \"api_key\": \"\"\n", + " },\n", + " {\n", + " \"model\": \"gpt-4\",\n", + " \"api_key\": \"\",\n", + " \"api_base\": \"\",\n", + " \"api_type\": \"azure\",\n", + " \"api_version\": \"2023-06-01-preview\"\n", + " },\n", + " {\n", + " \"model\": \"gpt-4-32k\",\n", + " \"api_key\": \"\",\n", + " \"api_base\": \"\",\n", + " \"api_type\": \"azure\",\n", + " \"api_version\": \"2023-06-01-preview\"\n", + " }\n", + "]\n", + "```\n", + "\n", + "If you open this notebook in colab, you can upload your files by clicking the file icon on the left panel and then choose \"upload file\" icon.\n" ] }, { @@ -69,7 +91,7 @@ "metadata": {}, "outputs": [], "source": [ - "from flaml import autogen\n", + "import autogen\n", "\n", "config_list = autogen.config_list_from_json(\n", " \"OAI_CONFIG_LIST\",\n", @@ -84,34 +106,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The config list looks like the following:\n", - "```python\n", - "config_list = [\n", - " {\n", - " 'model': 'gpt-4',\n", - " 'api_key': '',\n", - " }, # OpenAI API endpoint for gpt-4\n", - " {\n", - " 'model': 'gpt-4',\n", - " 'api_key': '',\n", - " 'api_base': '',\n", - " 'api_type': 'azure',\n", - " 'api_version': '2023-06-01-preview',\n", - " }, # Azure OpenAI API endpoint for gpt-4\n", - " {\n", - " 'model': 'gpt-4-32k',\n", - " 'api_key': '',\n", - " 'api_base': '',\n", - " 'api_type': 'azure',\n", - " 'api_version': '2023-06-01-preview',\n", - " }, # Azure OpenAI API endpoint for gpt-4-32k\n", - "]\n", - "```\n", - "\n", - "If you open this notebook in colab, you can upload your files by clicking the file icon on the left panel and then choose \"upload file\" icon.\n", - "\n", - "You can set the value of config_list in other ways you prefer, e.g., loading from a YAML file.\n", - "\n", "## Construct Agents\n", "\n", "We define `ask_expert` function to start a conversation between two agents and return a summary of the result. We construct an assistant agent named \"assistant_for_expert\" and a user proxy agent named \"expert\". We specify `human_input_mode` as \"ALWAYS\" in the user proxy agent, which will always ask for feedback from the expert user." diff --git a/notebook/agentchat_web_info.ipynb b/notebook/agentchat_web_info.ipynb index 6ec0b06719a0..986a5615c504 100644 --- a/notebook/agentchat_web_info.ipynb +++ b/notebook/agentchat_web_info.ipynb @@ -59,7 +59,7 @@ "source": [ "## Set your API Endpoint\n", "\n", - "The [`config_list_from_json`](https://microsoft.github.io/FLAML/docs/reference/autogen/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file.\n" + "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file.\n" ] }, { diff --git a/notebook/research/autogen_code.ipynb b/notebook/research/autogen_code.ipynb deleted file mode 100644 index be6c31b3cbf8..000000000000 --- a/notebook/research/autogen_code.ipynb +++ /dev/null @@ -1,790 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "Copyright (c) Microsoft Corporation. All rights reserved. \n", - "\n", - "Licensed under the MIT License.\n", - "\n", - "# Use AutoGen to Optimize Code Generation Performance\n", - "\n", - "In this notebook, we optimize OpenAI models for code generation. We use [the HumanEval benchmark](https://huggingface.co/datasets/openai_humaneval) released by OpenAI for synthesizing programs from docstrings.\n", - "\n", - "Related link: [Blogpost](https://microsoft.github.io/autogen/blog/2023/05/18/GPT-adaptive-humaneval) based on this experiment.\n", - "\n", - "## Requirements\n", - "\n", - "AutoGen requires `Python>=3.8`. To run this notebook example, please install:\n", - "```bash\n", - "pip install flaml[autogen]==1.2.2\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-24T23:25:36.910966Z", - "iopub.status.busy": "2023-02-24T23:25:36.910473Z", - "iopub.status.idle": "2023-02-24T23:25:36.914554Z", - "shell.execute_reply": "2023-02-24T23:25:36.914030Z" - } - }, - "outputs": [], - "source": [ - "# %pip install flaml[autogen]==1.2.2 datasets" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set your OpenAI key:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-24T23:25:36.917301Z", - "iopub.status.busy": "2023-02-24T23:25:36.917011Z", - "iopub.status.idle": "2023-02-24T23:25:36.923156Z", - "shell.execute_reply": "2023-02-24T23:25:36.922619Z" - } - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "if \"OPENAI_API_KEY\" not in os.environ:\n", - " os.environ[\"OPENAI_API_KEY\"] = \"\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you use Azure OpenAI, uncomment the following:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-24T23:25:36.925804Z", - "iopub.status.busy": "2023-02-24T23:25:36.925423Z", - "iopub.status.idle": "2023-02-24T23:25:36.928191Z", - "shell.execute_reply": "2023-02-24T23:25:36.927673Z" - } - }, - "outputs": [], - "source": [ - "# import openai\n", - "# openai.api_type = \"azure\"\n", - "# openai.api_base = \"https://.openai.azure.com/\"\n", - "# openai.api_version = \"2023-03-15-preview\" # change if necessary" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load dataset\n", - "\n", - "First, we load the humaneval dataset. The dataset contains 164 examples. In each example, the \"prompt\" is the prompt string for eliciting the code generation (renamed into \"definition\"), \"test\" is the Python code for unit test for the example, and \"entry_point\" is the function name to be tested." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-24T23:25:36.931255Z", - "iopub.status.busy": "2023-02-24T23:25:36.930838Z", - "iopub.status.idle": "2023-02-24T23:25:39.148799Z", - "shell.execute_reply": "2023-02-24T23:25:39.148113Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Found cached dataset openai_humaneval (/home/vscode/.cache/huggingface/datasets/openai_humaneval/openai_humaneval/1.0.0/2955cebd73602e828fa8c0a424c594e5fab4ec863b316ca98f3d8fdb6a626e75)\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1fdc8853bf2a4aecaa2cd024ad99b5a2", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/1 [00:00=3.8`. To run this notebook example, please install:\n", - "```bash\n", - "pip install flaml[openai]==1.2.2\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-13T23:40:52.317406Z", - "iopub.status.busy": "2023-02-13T23:40:52.316561Z", - "iopub.status.idle": "2023-02-13T23:40:52.321193Z", - "shell.execute_reply": "2023-02-13T23:40:52.320628Z" - } - }, - "outputs": [], - "source": [ - "# %pip install flaml[openai]==1.2.2 datasets" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set your OpenAI key:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-13T23:40:52.324240Z", - "iopub.status.busy": "2023-02-13T23:40:52.323783Z", - "iopub.status.idle": "2023-02-13T23:40:52.330570Z", - "shell.execute_reply": "2023-02-13T23:40:52.329750Z" - } - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "if \"OPENAI_API_KEY\" not in os.environ:\n", - " os.environ[\"OPENAI_API_KEY\"] = \"\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Uncomment the following to use Azure OpenAI:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-13T23:40:52.333547Z", - "iopub.status.busy": "2023-02-13T23:40:52.333249Z", - "iopub.status.idle": "2023-02-13T23:40:52.336508Z", - "shell.execute_reply": "2023-02-13T23:40:52.335858Z" - } - }, - "outputs": [], - "source": [ - "# import openai\n", - "# openai.api_type = \"azure\"\n", - "# openai.api_base = \"https://.openai.azure.com/\"\n", - "# openai.api_version = \"2023-03-15-preview\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load dataset\n", - "\n", - "First, we load the competition_math dataset. We use a random sample of 50 examples for testing." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-13T23:40:52.339977Z", - "iopub.status.busy": "2023-02-13T23:40:52.339556Z", - "iopub.status.idle": "2023-02-13T23:40:54.603349Z", - "shell.execute_reply": "2023-02-13T23:40:54.602630Z" - } - }, - "outputs": [], - "source": [ - "import datasets\n", - "\n", - "seed = 41\n", - "data = datasets.load_dataset(\"competition_math\")\n", - "train_data = data[\"train\"].shuffle(seed=seed)\n", - "test_data = data[\"test\"].shuffle(seed=seed)\n", - "n_tune_data = 20\n", - "tune_data = [\n", - " {\n", - " \"problem\": train_data[x][\"problem\"],\n", - " \"solution\": train_data[x][\"solution\"],\n", - " }\n", - " for x in range(len(train_data)) if train_data[x][\"level\"] == \"Level 5\" and train_data[x][\"type\"] == \"Counting & Probability\"\n", - "][:n_tune_data]\n", - "test_data = [\n", - " {\n", - " \"problem\": test_data[x][\"problem\"],\n", - " \"solution\": test_data[x][\"solution\"],\n", - " }\n", - " for x in range(len(test_data)) if test_data[x][\"level\"] == \"Level 5\" and test_data[x][\"type\"] == \"Counting & Probability\"\n", - "]\n", - "print(len(tune_data), len(test_data))\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "Check a tuning example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-13T23:40:54.607152Z", - "iopub.status.busy": "2023-02-13T23:40:54.606441Z", - "iopub.status.idle": "2023-02-13T23:40:54.610504Z", - "shell.execute_reply": "2023-02-13T23:40:54.609759Z" - }, - "slideshow": { - "slide_type": "subslide" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "print(tune_data[1][\"problem\"])" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here is one example of the canonical solution:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-13T23:40:54.613590Z", - "iopub.status.busy": "2023-02-13T23:40:54.613168Z", - "iopub.status.idle": "2023-02-13T23:40:54.616873Z", - "shell.execute_reply": "2023-02-13T23:40:54.616193Z" - } - }, - "outputs": [], - "source": [ - "print(tune_data[1][\"solution\"])" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import Success Metric\n", - "\n", - "For each math task, we use voting to select a response with the most common answers out of all the generated responses. If it has an equivalent answer to the canonical solution, we consider the task as successfully solved. Then we can optimize the mean success rate of a collection of tasks." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-13T23:40:54.626998Z", - "iopub.status.busy": "2023-02-13T23:40:54.626593Z", - "iopub.status.idle": "2023-02-13T23:40:54.631383Z", - "shell.execute_reply": "2023-02-13T23:40:54.630770Z" - } - }, - "outputs": [], - "source": [ - "from flaml.autogen.math_utils import eval_math_responses" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "### Import the oai subpackage from flaml.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-13T23:40:54.634335Z", - "iopub.status.busy": "2023-02-13T23:40:54.633929Z", - "iopub.status.idle": "2023-02-13T23:40:56.105700Z", - "shell.execute_reply": "2023-02-13T23:40:56.105085Z" - }, - "slideshow": { - "slide_type": "slide" - } - }, - "outputs": [], - "source": [ - "from flaml.autogen import oai" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For (local) reproducibility and cost efficiency, we cache responses from OpenAI." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-13T23:40:56.109177Z", - "iopub.status.busy": "2023-02-13T23:40:56.108624Z", - "iopub.status.idle": "2023-02-13T23:40:56.112651Z", - "shell.execute_reply": "2023-02-13T23:40:56.112076Z" - }, - "slideshow": { - "slide_type": "slide" - } - }, - "outputs": [], - "source": [ - "oai.ChatCompletion.set_cache(seed)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This will create a disk cache in \".cache/{seed}\". You can change `cache_path` in `set_cache()`. The cache for different seeds are stored separately." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2023-02-13T23:40:56.115383Z", - "iopub.status.busy": "2023-02-13T23:40:56.114975Z", - "iopub.status.idle": "2023-02-13T23:41:55.045654Z", - "shell.execute_reply": "2023-02-13T23:41:55.044973Z" - } - }, - "outputs": [], - "source": [ - "prompt = \"{problem} Solve the problem carefully. Simplify your answer as much as possible. Put the final answer in \\\\boxed{{}}.\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Evaluate the success rate on the test data\n", - "\n", - "You can use `oai.ChatCompletion.test` to evaluate the performance of an entire dataset with a config." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "\n", - "config_n1 = {\"model\": 'gpt-4', \"prompt\": prompt, \"max_tokens\": 600, \"n\": 1}\n", - "n1_result = oai.ChatCompletion.test(test_data[:50], eval_math_responses, **config_n1)\n", - "print(n1_result)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "oai.ChatCompletion.request_timeout = 120\n", - "config_n10 = {\"model\": 'gpt-4', \"prompt\": prompt, \"max_tokens\": 600, \"n\": 10}\n", - "n10_result = oai.ChatCompletion.test(test_data[:50], eval_math_responses, logging_level=logging.INFO, **config_n10)\n", - "print(n10_result)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "config_n30 = {\"model\": 'gpt-4', \"prompt\": prompt, \"max_tokens\": 600, \"n\": 30}\n", - "n30_result = oai.ChatCompletion.test(test_data[:50], eval_math_responses, logging_level=logging.INFO, **config_n30)\n", - "print(n30_result)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from collections import defaultdict\n", - "import matplotlib.pyplot as plt\n", - "\n", - "prompts = [\"{problem} Solve the problem carefully. Simplify your answer as much as possible. Put the final answer in \\\\boxed{{}}.\"]\n", - "markers = [\"o\", \"s\", \"D\", \"v\", \"p\", \"h\", \"d\", \"P\", \"X\", \"H\", \"8\", \"4\", \"3\", \"2\", \"1\", \"x\", \"+\", \">\", \"<\", \"^\", \"v\", \"1\", \"2\", \"3\", \"4\", \"8\", \"s\", \"p\", \"*\", \"h\", \"H\", \"d\", \"D\", \"|\", \"_\"]\n", - "for j, n in enumerate([10, 30]):\n", - " config = {\"model\": 'gpt-4', \"prompt\": prompts[0], \"max_tokens\": 600, \"n\": n}\n", - " metrics = []\n", - " x, y = [], []\n", - " votes_success = defaultdict(lambda: [0, 0])\n", - " for i, data_i in enumerate(test_data[:50]):\n", - " response = oai.ChatCompletion.create(context=data_i, allow_format_str_template=True, **config)\n", - " responses = oai.ChatCompletion.extract_text(response)\n", - " metrics.append(eval_math_responses(responses, **data_i))\n", - " votes = metrics[-1][\"votes\"]\n", - " success = metrics[-1][\"success_vote\"]\n", - " votes_success[votes][0] += 1\n", - " votes_success[votes][1] += success\n", - " for votes in votes_success:\n", - " x.append(votes)\n", - " y.append(votes_success[votes][1] / votes_success[votes][0])\n", - "\n", - " plt.scatter(x, y, marker=markers[j])\n", - " plt.xlabel(\"top vote\")\n", - " plt.ylabel(\"success rate\")\n", - "plt.legend([\"n=10\", \"n=30\"])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - }, - "vscode": { - "interpreter": { - "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" - } - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": { - "2d910cfd2d2a4fc49fc30fbbdc5576a7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "2.0.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "2.0.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "2.0.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border_bottom": null, - "border_left": null, - "border_right": null, - "border_top": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "454146d0f7224f038689031002906e6f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "2.0.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "2.0.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_e4ae2b6f5a974fd4bafb6abb9d12ff26", - "IPY_MODEL_577e1e3cc4db4942b0883577b3b52755", - "IPY_MODEL_b40bdfb1ac1d4cffb7cefcb870c64d45" - ], - "layout": "IPY_MODEL_dc83c7bff2f241309537a8119dfc7555", - "tabbable": null, - "tooltip": null - } - }, - "577e1e3cc4db4942b0883577b3b52755": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "2.0.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "2.0.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_allow_html": false, - "layout": "IPY_MODEL_2d910cfd2d2a4fc49fc30fbbdc5576a7", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_74a6ba0c3cbc4051be0a83e152fe1e62", - "tabbable": null, - "tooltip": null, - "value": 1 - } - }, - "6086462a12d54bafa59d3c4566f06cb2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "2.0.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "2.0.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "2.0.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border_bottom": null, - "border_left": null, - "border_right": null, - "border_top": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "74a6ba0c3cbc4051be0a83e152fe1e62": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "2.0.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "2.0.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "7d3f3d9e15894d05a4d188ff4f466554": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "HTMLStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "2.0.0", - "_model_name": "HTMLStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "2.0.0", - "_view_name": "StyleView", - "background": null, - "description_width": "", - "font_size": null, - "text_color": null - } - }, - "b40bdfb1ac1d4cffb7cefcb870c64d45": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "2.0.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "2.0.0", - "_view_name": "HTMLView", - "description": "", - "description_allow_html": false, - "layout": "IPY_MODEL_f1355871cc6f4dd4b50d9df5af20e5c8", - "placeholder": "​", - "style": "IPY_MODEL_ca245376fd9f4354af6b2befe4af4466", - "tabbable": null, - "tooltip": null, - "value": " 1/1 [00:00<00:00, 44.69it/s]" - } - }, - "ca245376fd9f4354af6b2befe4af4466": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "HTMLStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "2.0.0", - "_model_name": "HTMLStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "2.0.0", - "_view_name": "StyleView", - "background": null, - "description_width": "", - "font_size": null, - "text_color": null - } - }, - "dc83c7bff2f241309537a8119dfc7555": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "2.0.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "2.0.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "2.0.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border_bottom": null, - "border_left": null, - "border_right": null, - "border_top": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e4ae2b6f5a974fd4bafb6abb9d12ff26": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "2.0.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "2.0.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "2.0.0", - "_view_name": "HTMLView", - "description": "", - "description_allow_html": false, - "layout": "IPY_MODEL_6086462a12d54bafa59d3c4566f06cb2", - "placeholder": "​", - "style": "IPY_MODEL_7d3f3d9e15894d05a4d188ff4f466554", - "tabbable": null, - "tooltip": null, - "value": "100%" - } - }, - "f1355871cc6f4dd4b50d9df5af20e5c8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "2.0.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "2.0.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "2.0.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border_bottom": null, - "border_left": null, - "border_right": null, - "border_top": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - } - }, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}