Skip to content

Commit

Permalink
Add support for configurable embedding model (mem0ai#1627)
Browse files Browse the repository at this point in the history
Co-authored-by: Dev Khant <[email protected]>
  • Loading branch information
kmitul and Dev-Khant authored Aug 12, 2024
1 parent 4aae2b5 commit 464a188
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 23 deletions.
13 changes: 10 additions & 3 deletions mem0/configs/embeddings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ def __init__(
self,
model: Optional[str] = None,
embedding_dims: Optional[int] = None,

# Ollama specific
base_url: Optional[str] = None
base_url: Optional[str] = None,
# Huggingface specific
model_kwargs: Optional[dict] = None
):
"""
Initializes a configuration class instance for the Embeddings.
Expand All @@ -23,10 +24,16 @@ def __init__(
:type embedding_dims: Optional[int], optional
:param base_url: Base URL for the Ollama API, defaults to None
:type base_url: Optional[str], optional
:param model_kwargs: key-value arguments for the huggingface embedding model, defaults a dict inside init
:type model_kwargs: Optional[Dict[str, Any]], defaults a dict inside init
"""

self.model = model
self.embedding_dims = embedding_dims

# Ollama specific
self.base_url = base_url
self.base_url = base_url

# Huggingface specific
self.model_kwargs = model_kwargs or {}
36 changes: 36 additions & 0 deletions mem0/embeddings/azure_openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import Optional

from openai import AzureOpenAI

from mem0.configs.embeddings.base import BaseEmbedderConfig
from mem0.embeddings.base import EmbeddingBase

class AzureOpenAIEmbedding(EmbeddingBase):
def __init__(self, config: Optional[BaseEmbedderConfig] = None):
super().__init__(config)

if self.config.model is None:
self.config.model = "text-embedding-3-small"
if self.config.embedding_dims is None:
self.config.embedding_dims = 1536
self.client = AzureOpenAI()

def embed(self, text):
"""
Get the embedding for the given text using OpenAI.
Args:
text (str): The text to embed.
Returns:
list: The embedding vector.
"""
text = text.replace("\n", " ")
return (
self.client.embeddings.create(
input=[text],
model=self.config.model
)
.data[0]
.embedding
)
13 changes: 6 additions & 7 deletions mem0/embeddings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,18 @@

from mem0.configs.embeddings.base import BaseEmbedderConfig


class EmbeddingBase(ABC):
def __init__(self, config: Optional[BaseEmbedderConfig] = None):
"""Initialize a base LLM class
"""Initialized a base embedding class
:param config: Embedder configuration option class, defaults to None
:type config: Optional[BaseEmbedderConfig], optional
"""
:param config: Embedding configuration option class, defaults to None
:type config: Optional[BaseEmbedderConfig], optional
"""
def __init__(self, config: Optional[BaseEmbedderConfig] = None):
if config is None:
self.config = BaseEmbedderConfig()
else:
self.config = config

@abstractmethod
def embed(self, text):
"""
Expand Down
5 changes: 3 additions & 2 deletions mem0/embeddings/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ class EmbedderConfig(BaseModel):
default="openai",
)
config: Optional[dict] = Field(
description="Configuration for the specific embedding model", default=None
description="Configuration for the specific embedding model",
default={}
)

@field_validator("config")
def validate_config(cls, v, values):
provider = values.data.get("provider")
if provider in ["openai", "ollama"]:
if provider in ["openai", "ollama", "huggingface", "azure_openai"]:
return v
else:
raise ValueError(f"Unsupported embedding provider: {provider}")
Expand Down
22 changes: 19 additions & 3 deletions mem0/embeddings/huggingface.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
from mem0.embeddings.base import EmbeddingBase
from typing import Optional

from sentence_transformers import SentenceTransformer

from mem0.configs.embeddings.base import BaseEmbedderConfig
from mem0.embeddings.base import EmbeddingBase


class HuggingFaceEmbedding(EmbeddingBase):
def __init__(self, model_name="multi-qa-MiniLM-L6-cos-v1"):
self.model = SentenceTransformer(model_name)
def __init__(self, config: Optional[BaseEmbedderConfig] = None):
super().__init__(config)

if self.config.model is None:
self.config.model = "multi-qa-MiniLM-L6-cos-v1"

self.model = SentenceTransformer(
self.config.model,
**self.config.model_kwargs
)

if self.config.embedding_dims is None:
self.config.embedding_dims = self.model.get_sentence_embedding_dimension()


def embed(self, text):
"""
Get the embedding for the given text using Hugging Face.
Expand Down
7 changes: 5 additions & 2 deletions mem0/embeddings/openai.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import Optional

from openai import OpenAI

from mem0.configs.embeddings.base import BaseEmbedderConfig
from mem0.embeddings.base import EmbeddingBase


class OpenAIEmbedding(EmbeddingBase):
def __init__(self, config: Optional[BaseEmbedderConfig] = None):
super().__init__(config)
Expand All @@ -28,7 +28,10 @@ def embed(self, text):
"""
text = text.replace("\n", " ")
return (
self.client.embeddings.create(input=[text], model=self.config.model)
self.client.embeddings.create(
input=[text],
model=self.config.model
)
.data[0]
.embedding
)
2 changes: 1 addition & 1 deletion mem0/memory/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
class Memory(MemoryBase):
def __init__(self, config: MemoryConfig = MemoryConfig()):
self.config = config
self.embedding_model = EmbedderFactory.create(self.config.embedder.provider)
self.embedding_model = EmbedderFactory.create(self.config.embedder.provider, self.config.embedder.config)
self.vector_store = VectorStoreFactory.create(self.config.vector_store.provider, self.config.vector_store.config)
self.llm = LlmFactory.create(self.config.llm.provider, self.config.llm.config)
self.db = SQLiteManager(self.config.history_db_path)
Expand Down
13 changes: 8 additions & 5 deletions mem0/utils/factory.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import importlib

from mem0.configs.llms.base import BaseLlmConfig

from mem0.configs.embeddings.base import BaseEmbedderConfig

def load_class(class_type):
module_path, class_name = class_type.rsplit(".", 1)
Expand Down Expand Up @@ -33,15 +33,18 @@ def create(cls, provider_name, config):
class EmbedderFactory:
provider_to_class = {
"openai": "mem0.embeddings.openai.OpenAIEmbedding",
"ollama": "mem0.embeddings.ollama.OllamaEmbedding"
"ollama": "mem0.embeddings.ollama.OllamaEmbedding",
"huggingface": "mem0.embeddings.huggingface.HuggingFaceEmbedding",
"azure_openai": "mem0.embeddings.azure_openai.AzureOpenAIEmbedding",
}

@classmethod
def create(cls, provider_name):
def create(cls, provider_name, config):
class_type = cls.provider_to_class.get(provider_name)
if class_type:
embedder_instance = load_class(class_type)()
return embedder_instance
embedder_instance = load_class(class_type)
base_config = BaseEmbedderConfig(**config)
return embedder_instance(base_config)
else:
raise ValueError(f"Unsupported Embedder provider: {provider_name}")

Expand Down

0 comments on commit 464a188

Please sign in to comment.