Skip to content

Commit

Permalink
Add example for question answering over documents with OpenAI Functio…
Browse files Browse the repository at this point in the history
…n Agent (langchain-ai#6448)

This PR adds an example of doing question answering over documents using
OpenAI Function Agents.

#### Who can review?

@hwchase17

---------

Co-authored-by: Harrison Chase <[email protected]>
  • Loading branch information
homanp and hwchase17 authored Jun 20, 2023
1 parent 68a675c commit d4e8e0f
Showing 1 changed file with 183 additions and 0 deletions.
183 changes: 183 additions & 0 deletions docs/extras/modules/agents/toolkits/document_comparison_toolkit.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "ec1d7a9a",
"metadata": {},
"source": [
"# Document Comparison\n",
"\n",
"This notebook shows how to use an agent to compare two documents.\n",
"\n",
"The high level idea is we will create a question-answering chain for each document, and then use that "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "8632a37c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
" warnings.warn(\n"
]
}
],
"source": [
"from pydantic import BaseModel, Field\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.agents import Tool\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import FAISS\n",
"from langchain.document_loaders import PyPDFLoader\n",
"from langchain.chains import RetrievalQA"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "64f19917",
"metadata": {},
"outputs": [],
"source": [
"class DocumentInput(BaseModel):\n",
" question: str = Field()\n",
"\n",
"\n",
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
"\n",
"tools = []\n",
"files = [\n",
" # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n",
" {\n",
" \"name\": \"alphabet-earnings\", \n",
" \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n",
" }, \n",
" # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n",
" {\n",
" \"name\": \"tesla-earnings\", \n",
" \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\"\n",
" }\n",
"]\n",
"\n",
"for file in files:\n",
" loader = PyPDFLoader(file[\"path\"])\n",
" pages = loader.load_and_split()\n",
" text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
" docs = text_splitter.split_documents(pages)\n",
" embeddings = OpenAIEmbeddings()\n",
" retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n",
" \n",
" # Wrap retrievers in a Tool\n",
" tools.append(\n",
" Tool(\n",
" args_schema=DocumentInput,\n",
" name=file[\"name\"], \n",
" description=f\"useful when you want to answer questions about {file['name']}\",\n",
" func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n",
" )\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "eca02549",
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import initialize_agent\n",
"from langchain.agents import AgentType"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c4d56c25",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Invoking: `alphabet-earnings` with `{'question': 'revenue'}`\n",
"\n",
"\n",
"\u001b[0m\u001b[36;1m\u001b[1;3m{'query': 'revenue', 'result': 'The revenue for Alphabet Inc. in the first quarter of 2023 was $69,787 million.'}\u001b[0m\u001b[32;1m\u001b[1;3m\n",
"Invoking: `tesla-earnings` with `{'question': 'revenue'}`\n",
"\n",
"\n",
"\u001b[0m\u001b[33;1m\u001b[1;3m{'query': 'revenue', 'result': 'Total revenue for Q1-2023 was $23.3 billion.'}\u001b[0m\u001b[32;1m\u001b[1;3mAlphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'input': 'did alphabet or tesla have more revenue?',\n",
" 'output': 'Alphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.'}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"llm = ChatOpenAI(\n",
" temperature=0,\n",
" model=\"gpt-3.5-turbo-0613\", \n",
")\n",
"\n",
"agent = initialize_agent(\n",
" agent=AgentType.OPENAI_FUNCTIONS,\n",
" tools=tools,\n",
" llm=llm,\n",
" verbose=True,\n",
")\n",
"\n",
"agent({\"input\": \"did alphabet or tesla have more revenue?\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6db4c853",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit d4e8e0f

Please sign in to comment.