Skip to content

Commit

Permalink
index: fix (langchain-ai#259)
Browse files Browse the repository at this point in the history
  • Loading branch information
baskaryan authored Feb 7, 2024
1 parent 2766bb6 commit 305b08e
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 31 deletions.
10 changes: 7 additions & 3 deletions _index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
from typing import Callable, Iterable, Literal, Optional, Sequence, Union, cast

from langchain.document_loaders.base import BaseLoader
from langchain.indexes._api import (IndexingResult, _batch,
_deduplicate_in_order,
_get_source_id_assigner, _HashedDocument)
from langchain.indexes._api import (
IndexingResult,
_batch,
_deduplicate_in_order,
_get_source_id_assigner,
_HashedDocument,
)
from langchain.indexes.base import RecordManager
from langchain.schema.document import Document
from langchain.schema.vectorstore import VectorStore
Expand Down
3 changes: 1 addition & 2 deletions _scripts/evaluate_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
from langchain import load as langchain_load
from langchain.chat_models import ChatAnthropic, ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import (ChatPromptTemplate, MessagesPlaceholder,
PromptTemplate)
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.retriever import BaseRetriever
from langchain.schema.runnable import Runnable, RunnableMap
Expand Down
5 changes: 3 additions & 2 deletions _scripts/evaluate_chains_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import weaviate
from langchain import load as langchain_load
from langchain.agents import AgentExecutor, Tool
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import \
AgentTokenBufferMemory
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
AgentTokenBufferMemory,
)
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
Expand Down
3 changes: 1 addition & 2 deletions _scripts/evaluate_chains_improved_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from langchain.chat_models import ChatAnthropic, ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import (ChatPromptTemplate, MessagesPlaceholder,
PromptTemplate)
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.retriever import BaseRetriever
from langchain.schema.runnable import Runnable, RunnableMap
Expand Down
37 changes: 18 additions & 19 deletions chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,32 @@
import weaviate
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from langchain_community.chat_models.anthropic import ChatAnthropic
from langchain_community.chat_models.fireworks import ChatFireworks
from langchain_community.embeddings.voyageai import VoyageEmbeddings
from langchain_community.vectorstores.weaviate import Weaviate
from langchain_community.chat_models import ChatAnthropic, ChatFireworks
from langchain_community.vectorstores import Weaviate
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.language_models.base import BaseLanguageModel
from langchain_core.language_models import BaseLanguageModel
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import (ChatPromptTemplate, MessagesPlaceholder,
PromptTemplate)
from langchain_core.prompts import (
ChatPromptTemplate,
MessagesPlaceholder,
PromptTemplate,
)
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.retrievers import BaseRetriever
from langchain_core.runnables import (ConfigurableField, Runnable,
RunnableBranch, RunnableLambda,
RunnableMap)
from langchain_core.runnables import (
ConfigurableField,
Runnable,
RunnableBranch,
RunnableLambda,
RunnableMap,
)
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langsmith import Client
from pydantic import BaseModel

from constants import WEAVIATE_DOCS_INDEX_NAME
from ingest import get_embeddings_model

RESPONSE_TEMPLATE = """\
You are an expert programmer and problem-solver, tasked with answering any question \
Expand Down Expand Up @@ -92,12 +97,6 @@ class ChatRequest(BaseModel):
chat_history: Optional[List[Dict[str, str]]]


def get_embeddings_model() -> Embeddings:
if os.environ.get("VOYAGE_API_KEY") and os.environ.get("VOYAGE_AI_MODEL"):
return VoyageEmbeddings(model=os.environ["VOYAGE_AI_MODEL"])
return OpenAIEmbeddings(chunk_size=200)


def get_retriever() -> BaseRetriever:
weaviate_client = weaviate.Client(
url=WEAVIATE_URL,
Expand Down
13 changes: 10 additions & 3 deletions ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
from langchain.document_loaders import RecursiveUrlLoader, SitemapLoader
from langchain.indexes import SQLRecordManager
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.utils.html import (PREFIXES_TO_IGNORE_REGEX,
SUFFIXES_TO_IGNORE_REGEX)
from langchain.utils.html import PREFIXES_TO_IGNORE_REGEX, SUFFIXES_TO_IGNORE_REGEX
from langchain.vectorstores.weaviate import Weaviate
from langchain_community.embeddings import VoyageEmbeddings
from langchain_core.embeddings import Embeddings
from langchain_openai import OpenAIEmbeddings

from _index import index
from chain import get_embeddings_model
from constants import WEAVIATE_DOCS_INDEX_NAME

logging.basicConfig(level=logging.INFO)
Expand All @@ -25,6 +26,12 @@
RECORD_MANAGER_DB_URL = os.environ["RECORD_MANAGER_DB_URL"]


def get_embeddings_model() -> Embeddings:
if os.environ.get("VOYAGE_API_KEY") and os.environ.get("VOYAGE_AI_MODEL"):
return VoyageEmbeddings(model=os.environ["VOYAGE_AI_MODEL"])
return OpenAIEmbeddings(chunk_size=200)


def metadata_extractor(meta: dict, soup: BeautifulSoup) -> dict:
title = soup.find("title")
description = soup.find("meta", attrs={"name": "description"})
Expand Down

0 comments on commit 305b08e

Please sign in to comment.