forked from langchain-ai/chat-langchain
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 638f0ee
Showing
6 changed files
with
554 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import datetime | ||
import os | ||
|
||
import gradio as gr | ||
import langchain | ||
import weaviate | ||
from langchain.vectorstores import Weaviate | ||
|
||
from chain import get_new_chain1 | ||
|
||
WEAVIATE_URL = os.environ["WEAVIATE_URL"] | ||
|
||
|
||
def get_weaviate_store(): | ||
client = weaviate.Client( | ||
url=WEAVIATE_URL, | ||
additional_headers={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]}, | ||
) | ||
return Weaviate(client, "Paragraph", "content", attributes=["source"]) | ||
|
||
|
||
vectorstore = get_weaviate_store() | ||
|
||
|
||
def set_openai_api_key(api_key, agent): | ||
if api_key: | ||
os.environ["OPENAI_API_KEY"] = api_key | ||
qa_chain = get_new_chain1(vectorstore) | ||
os.environ["OPENAI_API_KEY"] = "" | ||
return qa_chain | ||
|
||
|
||
def chat(inp, history, agent): | ||
history = history or [] | ||
if agent is None: | ||
history.append((inp, "Please paste your OpenAI key to use")) | ||
return history, history | ||
print("\n==== date/time: " + str(datetime.datetime.now()) + " ====") | ||
print("inp: " + inp) | ||
history = history or [] | ||
output = agent({"question": inp, "chat_history": history}) | ||
answer = output["answer"] | ||
history.append((inp, answer)) | ||
print(history) | ||
return history, history | ||
|
||
|
||
block = gr.Blocks(css=".gradio-container {background-color: lightgray}") | ||
|
||
with block: | ||
with gr.Row(): | ||
gr.Markdown("<h3><center>LangChain AI</center></h3>") | ||
|
||
openai_api_key_textbox = gr.Textbox( | ||
placeholder="Paste your OpenAI API key (sk-...)", | ||
show_label=False, | ||
lines=1, | ||
type="password", | ||
) | ||
|
||
chatbot = gr.Chatbot() | ||
|
||
with gr.Row(): | ||
message = gr.Textbox( | ||
label="What's your question?", | ||
placeholder="What's the answer to life, the universe, and everything?", | ||
lines=1, | ||
) | ||
submit = gr.Button(value="Send", variant="secondary").style(full_width=False) | ||
|
||
gr.Examples( | ||
examples=[ | ||
"What are agents?", | ||
"How do I summarize a long document?", | ||
"What types of memory exist?", | ||
], | ||
inputs=message, | ||
) | ||
|
||
gr.HTML( | ||
""" | ||
This simple application is an implementation of ChatGPT but over an external dataset (in this case, the LangChain documentation).""" | ||
) | ||
|
||
gr.HTML( | ||
"<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>" | ||
) | ||
|
||
state = gr.State() | ||
agent_state = gr.State() | ||
|
||
submit.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state]) | ||
message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state]) | ||
|
||
openai_api_key_textbox.change( | ||
set_openai_api_key, | ||
inputs=[openai_api_key_textbox, agent_state], | ||
outputs=[agent_state], | ||
) | ||
|
||
block.launch(debug=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import json | ||
import os | ||
import pathlib | ||
from typing import Dict, List, Tuple | ||
|
||
import weaviate | ||
from langchain import OpenAI, PromptTemplate | ||
from langchain.chains import LLMChain | ||
from langchain.chains.base import Chain | ||
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain | ||
from langchain.chains.conversation.memory import ConversationBufferMemory | ||
from langchain.chains.question_answering import load_qa_chain | ||
from langchain.embeddings import OpenAIEmbeddings | ||
from langchain.prompts import FewShotPromptTemplate, PromptTemplate | ||
from langchain.prompts.example_selector import \ | ||
SemanticSimilarityExampleSelector | ||
from langchain.vectorstores import FAISS, Weaviate | ||
from pydantic import BaseModel | ||
|
||
WEAVIATE_URL = os.environ["WEAVIATE_URL"] | ||
client = weaviate.Client( | ||
url=WEAVIATE_URL, | ||
additional_headers={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]}, | ||
) | ||
|
||
_eg_template = """## Example: | ||
Chat History: | ||
{chat_history} | ||
Follow Up Input: {question} | ||
Standalone question: {answer}""" | ||
_eg_prompt = PromptTemplate( | ||
template=_eg_template, | ||
input_variables=["chat_history", "question", "answer"], | ||
) | ||
|
||
|
||
_prefix = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. You should assume that the question is related to LangChain.""" | ||
_suffix = """## Example: | ||
Chat History: | ||
{chat_history} | ||
Follow Up Input: {question} | ||
Standalone question:""" | ||
eg_store = Weaviate( | ||
client, | ||
"Rephrase", | ||
"content", | ||
attributes=["question", "answer", "chat_history"], | ||
) | ||
example_selector = SemanticSimilarityExampleSelector(vectorstore=eg_store, k=4) | ||
prompt = FewShotPromptTemplate( | ||
prefix=_prefix, | ||
suffix=_suffix, | ||
example_selector=example_selector, | ||
example_prompt=_eg_prompt, | ||
input_variables=["question", "chat_history"], | ||
) | ||
llm = OpenAI(temperature=0, model_name="text-davinci-003") | ||
key_word_extractor = LLMChain(llm=llm, prompt=prompt) | ||
|
||
|
||
class CustomChain(Chain, BaseModel): | ||
|
||
vstore: Weaviate | ||
chain: BaseCombineDocumentsChain | ||
|
||
@property | ||
def input_keys(self) -> List[str]: | ||
return ["question"] | ||
|
||
@property | ||
def output_keys(self) -> List[str]: | ||
return ["answer"] | ||
|
||
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: | ||
question = inputs["question"] | ||
chat_history_str = _get_chat_history(inputs["chat_history"]) | ||
if chat_history_str: | ||
new_question = key_word_extractor.run( | ||
question=question, chat_history=chat_history_str | ||
) | ||
else: | ||
new_question = question | ||
print(new_question) | ||
docs = self.vstore.similarity_search(new_question, k=4) | ||
new_inputs = inputs.copy() | ||
new_inputs["question"] = new_question | ||
new_inputs["chat_history"] = chat_history_str | ||
answer, _ = self.chain.combine_docs(docs, **new_inputs) | ||
return {"answer": answer} | ||
|
||
|
||
def get_new_chain1(vectorstore) -> Chain: | ||
|
||
EXAMPLE_PROMPT = PromptTemplate( | ||
template=">Example:\nContent:\n---------\n{page_content}\n----------\nSource: {source}", | ||
input_variables=["page_content", "source"], | ||
) | ||
template = """You are an AI assistant for the open source library LangChain. The documentation is located at https://langchain.readthedocs.io. | ||
You are given the following extracted parts of a long document and a question. Provide a conversational answer with a hyperlink to the documentation. | ||
You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed. | ||
If the question includes a request for code, provide a code block directly from the documentation. | ||
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer. | ||
If the question is not about LangChain, politely inform them that you are tuned to only answer questions about LangChain. | ||
Question: {question} | ||
========= | ||
{context} | ||
========= | ||
Answer in Markdown:""" | ||
PROMPT = PromptTemplate(template=template, input_variables=["question", "context"]) | ||
doc_chain = load_qa_chain( | ||
OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=-1), | ||
chain_type="stuff", | ||
prompt=PROMPT, | ||
document_prompt=EXAMPLE_PROMPT, | ||
) | ||
return CustomChain(chain=doc_chain, vstore=vectorstore) | ||
|
||
|
||
def _get_chat_history(chat_history: List[Tuple[str, str]]): | ||
buffer = "" | ||
for human_s, ai_s in chat_history: | ||
human = f"Human: " + human_s | ||
ai = f"Assistant: " + ai_s | ||
buffer += "\n" + "\n".join([human, ai]) | ||
return buffer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
"""Load html from files, clean up, split, ingest into Weaviate.""" | ||
import os | ||
from pathlib import Path | ||
|
||
import weaviate | ||
from bs4 import BeautifulSoup | ||
from langchain.text_splitter import CharacterTextSplitter | ||
|
||
|
||
def clean_data(data): | ||
soup = BeautifulSoup(data) | ||
text = soup.find_all("main", {"id": "main-content"})[0].get_text() | ||
return "\n".join([t for t in text.split("\n") if t]) | ||
|
||
|
||
docs = [] | ||
metadatas = [] | ||
for p in Path("langchain.readthedocs.io/en/latest/").rglob("*"): | ||
if p.is_dir(): | ||
continue | ||
with open(p) as f: | ||
docs.append(clean_data(f.read())) | ||
metadatas.append({"source": p}) | ||
|
||
|
||
text_splitter = CharacterTextSplitter( | ||
separator="\n", | ||
chunk_size=1000, | ||
chunk_overlap=200, | ||
length_function=len, | ||
) | ||
|
||
documents = text_splitter.create_documents(docs, metadatas=metadatas) | ||
|
||
|
||
WEAVIATE_URL = os.environ["WEAVIATE_URL"] | ||
client = weaviate.Client( | ||
url=WEAVIATE_URL, | ||
additional_headers={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]}, | ||
) | ||
|
||
client.schema.delete_class("Paragraph") | ||
client.schema.get() | ||
schema = { | ||
"classes": [ | ||
{ | ||
"class": "Paragraph", | ||
"description": "A written paragraph", | ||
"vectorizer": "text2vec-openai", | ||
"moduleConfig": { | ||
"text2vec-openai": { | ||
"model": "ada", | ||
"modelVersion": "002", | ||
"type": "text", | ||
} | ||
}, | ||
"properties": [ | ||
{ | ||
"dataType": ["text"], | ||
"description": "The content of the paragraph", | ||
"moduleConfig": { | ||
"text2vec-openai": { | ||
"skip": False, | ||
"vectorizePropertyName": False, | ||
} | ||
}, | ||
"name": "content", | ||
}, | ||
{ | ||
"dataType": ["text"], | ||
"description": "The link", | ||
"moduleConfig": { | ||
"text2vec-openai": { | ||
"skip": True, | ||
"vectorizePropertyName": False, | ||
} | ||
}, | ||
"name": "source", | ||
}, | ||
], | ||
}, | ||
] | ||
} | ||
|
||
client.schema.create(schema) | ||
|
||
with client.batch as batch: | ||
for text in documents: | ||
batch.add_data_object( | ||
{"content": text.page_content, "source": str(text.metadata["source"])}, | ||
"Paragraph", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Bash script to ingest data | ||
# This involves scraping the data from the web and then cleaning up and putting in Weaviate. | ||
!set -eu | ||
wget -r -A.html https://langchain.readthedocs.io/en/latest/ | ||
python3 ingest.py | ||
python3 ingest_examples.py |
Oops, something went wrong.