forked from langchain-ai/langchain
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add example for question answering over documents with OpenAI Functio…
…n Agent (langchain-ai#6448) This PR adds an example of doing question answering over documents using OpenAI Function Agents. #### Who can review? @hwchase17 --------- Co-authored-by: Harrison Chase <[email protected]>
- Loading branch information
Showing
1 changed file
with
183 additions
and
0 deletions.
There are no files selected for viewing
183 changes: 183 additions & 0 deletions
183
docs/extras/modules/agents/toolkits/document_comparison_toolkit.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "ec1d7a9a", | ||
"metadata": {}, | ||
"source": [ | ||
"# Document Comparison\n", | ||
"\n", | ||
"This notebook shows how to use an agent to compare two documents.\n", | ||
"\n", | ||
"The high level idea is we will create a question-answering chain for each document, and then use that " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "8632a37c", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n", | ||
" warnings.warn(\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from pydantic import BaseModel, Field\n", | ||
"\n", | ||
"from langchain.chat_models import ChatOpenAI\n", | ||
"from langchain.agents import Tool\n", | ||
"from langchain.embeddings.openai import OpenAIEmbeddings\n", | ||
"from langchain.text_splitter import CharacterTextSplitter\n", | ||
"from langchain.vectorstores import FAISS\n", | ||
"from langchain.document_loaders import PyPDFLoader\n", | ||
"from langchain.chains import RetrievalQA" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "64f19917", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"class DocumentInput(BaseModel):\n", | ||
" question: str = Field()\n", | ||
"\n", | ||
"\n", | ||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n", | ||
"\n", | ||
"tools = []\n", | ||
"files = [\n", | ||
" # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n", | ||
" {\n", | ||
" \"name\": \"alphabet-earnings\", \n", | ||
" \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n", | ||
" }, \n", | ||
" # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n", | ||
" {\n", | ||
" \"name\": \"tesla-earnings\", \n", | ||
" \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\"\n", | ||
" }\n", | ||
"]\n", | ||
"\n", | ||
"for file in files:\n", | ||
" loader = PyPDFLoader(file[\"path\"])\n", | ||
" pages = loader.load_and_split()\n", | ||
" text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", | ||
" docs = text_splitter.split_documents(pages)\n", | ||
" embeddings = OpenAIEmbeddings()\n", | ||
" retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n", | ||
" \n", | ||
" # Wrap retrievers in a Tool\n", | ||
" tools.append(\n", | ||
" Tool(\n", | ||
" args_schema=DocumentInput,\n", | ||
" name=file[\"name\"], \n", | ||
" description=f\"useful when you want to answer questions about {file['name']}\",\n", | ||
" func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n", | ||
" )\n", | ||
" )" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "eca02549", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from langchain.agents import initialize_agent\n", | ||
"from langchain.agents import AgentType" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "c4d56c25", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"\n", | ||
"\u001b[1m> Entering new chain...\u001b[0m\n", | ||
"\u001b[32;1m\u001b[1;3m\n", | ||
"Invoking: `alphabet-earnings` with `{'question': 'revenue'}`\n", | ||
"\n", | ||
"\n", | ||
"\u001b[0m\u001b[36;1m\u001b[1;3m{'query': 'revenue', 'result': 'The revenue for Alphabet Inc. in the first quarter of 2023 was $69,787 million.'}\u001b[0m\u001b[32;1m\u001b[1;3m\n", | ||
"Invoking: `tesla-earnings` with `{'question': 'revenue'}`\n", | ||
"\n", | ||
"\n", | ||
"\u001b[0m\u001b[33;1m\u001b[1;3m{'query': 'revenue', 'result': 'Total revenue for Q1-2023 was $23.3 billion.'}\u001b[0m\u001b[32;1m\u001b[1;3mAlphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.\u001b[0m\n", | ||
"\n", | ||
"\u001b[1m> Finished chain.\u001b[0m\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"{'input': 'did alphabet or tesla have more revenue?',\n", | ||
" 'output': 'Alphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.'}" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"llm = ChatOpenAI(\n", | ||
" temperature=0,\n", | ||
" model=\"gpt-3.5-turbo-0613\", \n", | ||
")\n", | ||
"\n", | ||
"agent = initialize_agent(\n", | ||
" agent=AgentType.OPENAI_FUNCTIONS,\n", | ||
" tools=tools,\n", | ||
" llm=llm,\n", | ||
" verbose=True,\n", | ||
")\n", | ||
"\n", | ||
"agent({\"input\": \"did alphabet or tesla have more revenue?\"})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "6db4c853", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.1" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |