Skip to content

Commit

Permalink
community[patch]: activeloop ai tql deprecation (langchain-ai#14634)
Browse files Browse the repository at this point in the history
Co-authored-by: AdkSarsen <[email protected]>
  • Loading branch information
hwchase17 and AdkSarsen authored Jan 29, 2024
1 parent c95facc commit 8457c31
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 42 deletions.
43 changes: 37 additions & 6 deletions libs/community/langchain_community/vectorstores/deeplake.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class DeepLake(VectorStore):
"""

_LANGCHAIN_DEFAULT_DEEPLAKE_PATH = "./deeplake/"
_valid_search_kwargs = ["lambda_mult"]

def __init__(
self,
Expand Down Expand Up @@ -219,11 +220,7 @@ def add_texts(
Returns:
List[str]: List of IDs of the added texts.
"""
if kwargs:
unsupported_items = "`, `".join(set(kwargs.keys()))
raise TypeError(
f"`{unsupported_items}` is/are not a valid argument to add_text method"
)
self._validate_kwargs(kwargs, "add_texts")

kwargs = {}
if ids:
Expand Down Expand Up @@ -371,6 +368,9 @@ def _search(
Raises:
ValueError: if both `embedding` and `embedding_function` are not specified.
"""
if kwargs.get("tql_query"):
logger.warning("`tql_query` is deprecated. Please use `tql` instead.")
kwargs["tql"] = kwargs.pop("tql_query")

if kwargs.get("tql"):
return self._search_tql(
Expand All @@ -384,6 +384,8 @@ def _search(
filter=filter,
)

self._validate_kwargs(kwargs, "search")

if embedding_function:
if isinstance(embedding_function, Embeddings):
_embedding_function = embedding_function.embed_query
Expand Down Expand Up @@ -417,7 +419,6 @@ def _search(
return_tensors=["embedding", "metadata", "text", self._id_tensor_name],
deep_memory=deep_memory,
)

scores = result["score"]
embeddings = result["embedding"]
metadatas = result["metadata"]
Expand Down Expand Up @@ -445,6 +446,9 @@ def _search(
]

if return_score:
if not isinstance(scores, list):
scores = [scores]

return [(doc, score) for doc, score in zip(docs, scores)]

return docs
Expand Down Expand Up @@ -899,3 +903,30 @@ def ds(self) -> Any:
"better to use `db.vectorstore.dataset` instead."
)
return self.vectorstore.dataset

@classmethod
def _validate_kwargs(cls, kwargs, method_name):
if kwargs:
valid_items = cls._get_valid_args(method_name)
unsupported_items = cls._get_unsupported_items(kwargs, valid_items)

if unsupported_items:
raise TypeError(
f"`{unsupported_items}` are not a valid "
f"argument to {method_name} method"
)

@classmethod
def _get_valid_args(cls, method_name):
if method_name == "search":
return cls._valid_search_kwargs
else:
return []

@staticmethod
def _get_unsupported_items(kwargs, valid_items):
kwargs = {k: v for k, v in kwargs.items() if k not in valid_items}
unsupported_items = None
if kwargs:
unsupported_items = "`, `".join(set(kwargs.keys()))
return unsupported_items
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ def deeplake_datastore() -> DeepLake:
embedding_function=FakeEmbeddings(),
overwrite=True,
)
return docsearch
yield docsearch

docsearch.delete_dataset()


@pytest.fixture(params=["L1", "L2", "max", "cos"])
Expand Down Expand Up @@ -50,27 +52,14 @@ def test_deeplake_with_metadatas() -> None:
assert output == [Document(page_content="foo", metadata={"page": "0"})]


def test_deeplakewith_persistence() -> None:
def test_deeplake_with_persistence(deeplake_datastore) -> None:
"""Test end to end construction and search, with persistence."""
import deeplake

dataset_path = "./tests/persist_dir"
if deeplake.exists(dataset_path):
deeplake.delete(dataset_path)

texts = ["foo", "bar", "baz"]
docsearch = DeepLake.from_texts(
dataset_path=dataset_path,
texts=texts,
embedding=FakeEmbeddings(),
)

output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
output = deeplake_datastore.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"page": "0"})]

# Get a new VectorStore from the persisted directory
docsearch = DeepLake(
dataset_path=dataset_path,
dataset_path=deeplake_datastore.vectorstore.dataset_handler.path,
embedding_function=FakeEmbeddings(),
)
output = docsearch.similarity_search("foo", k=1)
Expand All @@ -83,22 +72,12 @@ def test_deeplakewith_persistence() -> None:
# Or on program exit


def test_deeplake_overwrite_flag() -> None:
def test_deeplake_overwrite_flag(deeplake_datastore) -> None:
"""Test overwrite behavior"""
import deeplake
dataset_path = deeplake_datastore.vectorstore.dataset_handler.path

dataset_path = "./tests/persist_dir"
if deeplake.exists(dataset_path):
deeplake.delete(dataset_path)

texts = ["foo", "bar", "baz"]
docsearch = DeepLake.from_texts(
dataset_path=dataset_path,
texts=texts,
embedding=FakeEmbeddings(),
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
output = deeplake_datastore.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"page": "0"})]

# Get a new VectorStore from the persisted directory, with no overwrite (implicit)
docsearch = DeepLake(
Expand All @@ -107,7 +86,7 @@ def test_deeplake_overwrite_flag() -> None:
)
output = docsearch.similarity_search("foo", k=1)
# assert page still present
assert output == [Document(page_content="foo")]
assert output == [Document(page_content="foo", metadata={"page": "0"})]

# Get a new VectorStore from the persisted directory, with no overwrite (explicit)
docsearch = DeepLake(
Expand All @@ -117,7 +96,7 @@ def test_deeplake_overwrite_flag() -> None:
)
output = docsearch.similarity_search("foo", k=1)
# assert page still present
assert output == [Document(page_content="foo")]
assert output == [Document(page_content="foo", metadata={"page": "0"})]

# Get a new VectorStore from the persisted directory, with overwrite
docsearch = DeepLake(
Expand All @@ -129,8 +108,9 @@ def test_deeplake_overwrite_flag() -> None:
output = docsearch.similarity_search("foo", k=1)


def test_similarity_search(deeplake_datastore: DeepLake, distance_metric: str) -> None:
def test_similarity_search(deeplake_datastore) -> None:
"""Test similarity search."""
distance_metric = "cos"
output = deeplake_datastore.similarity_search(
"foo", k=1, distance_metric=distance_metric
)
Expand All @@ -145,7 +125,6 @@ def test_similarity_search(deeplake_datastore: DeepLake, distance_metric: str) -
query="foo", tql_query=tql_query, k=1, distance_metric=distance_metric
)
assert len(output) == 1
deeplake_datastore.delete_dataset()


def test_similarity_search_by_vector(
Expand All @@ -164,6 +143,7 @@ def test_similarity_search_with_score(
deeplake_datastore: DeepLake, distance_metric: str
) -> None:
"""Test similarity search with score."""
deeplake_datastore.vectorstore.summary()
output, score = deeplake_datastore.similarity_search_with_score(
"foo", k=1, distance_metric=distance_metric
)[0]
Expand Down Expand Up @@ -281,3 +261,11 @@ def test_ids_backwards_compatibility() -> None:
)
output = db.similarity_search("foo", k=1)
assert len(output) == 1


def test_similarity_search_should_error_out_when_not_supported_kwargs_are_provided(
deeplake_datastore: DeepLake,
) -> None:
"""Test that ids are backwards compatible."""
with pytest.raises(TypeError):
deeplake_datastore.similarity_search("foo", k=1, not_supported_kwarg=True)

0 comments on commit 8457c31

Please sign in to comment.