forked from PromtEngineer/localGPT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
constants.py
42 lines (33 loc) · 1.41 KB
/
constants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
# from dotenv import load_dotenv
from chromadb.config import Settings
# https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel
from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader
# load_dotenv()
ROOT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
# Define the folder for storing database
SOURCE_DIRECTORY = f"{ROOT_DIRECTORY}/SOURCE_DOCUMENTS"
PERSIST_DIRECTORY = f"{ROOT_DIRECTORY}/DB"
# Can be changed to a specific number
INGEST_THREADS = os.cpu_count() or 8
# Define the Chroma settings
CHROMA_SETTINGS = Settings(
chroma_db_impl="duckdb+parquet", persist_directory=PERSIST_DIRECTORY, anonymized_telemetry=False
)
# https://python.langchain.com/en/latest/_modules/langchain/document_loaders/excel.html#UnstructuredExcelLoader
DOCUMENT_MAP = {
".txt": TextLoader,
".md": TextLoader,
".py": TextLoader,
".pdf": PDFMinerLoader,
".csv": CSVLoader,
".xls": UnstructuredExcelLoader,
".xlxs": UnstructuredExcelLoader,
".docx": Docx2txtLoader,
".doc": Docx2txtLoader,
}
# Default Instructor Model
EMBEDDING_MODEL_NAME = "hkunlp/instructor-large"
# You can also choose a smaller model, don't forget to change HuggingFaceInstructEmbeddings
# to HuggingFaceEmbeddings in both ingest.py and run_localGPT.py
# EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"