Skip to content

Commit

Permalink
Fix: Don't cache snapshot intervals (TobikoData#3041)
Browse files Browse the repository at this point in the history
  • Loading branch information
izeigerman authored Aug 23, 2024
1 parent 22413af commit d106f7d
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 0 deletions.
2 changes: 2 additions & 0 deletions sqlmesh/core/snapshot/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ def get_or_load(
if snapshot.is_model:
self._optimized_query_cache.with_optimized_query(snapshot.model)
self._update_node_hash_cache(snapshot)
snapshot.intervals = []
snapshot.dev_intervals = []
snapshots[s_id] = snapshot
cache_hits.add(s_id)

Expand Down
8 changes: 8 additions & 0 deletions sqlmesh/core/snapshot/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,14 @@ def _ensure_categorized(self) -> None:
if not self.version:
raise SQLMeshError(f"Snapshot {self.snapshot_id} has not been versioned yet.")

def __getstate__(self) -> t.Dict[t.Any, t.Any]:
state = super().__getstate__()
state["__dict__"] = state["__dict__"].copy()
# Don't store intervals.
state["__dict__"]["intervals"] = []
state["__dict__"]["dev_intervals"] = []
return state


class SnapshotTableCleanupTask(PydanticModel):
snapshot: SnapshotTableInfo
Expand Down
25 changes: 25 additions & 0 deletions tests/core/test_snapshot.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pickle
import json
import typing as t
from copy import deepcopy
Expand Down Expand Up @@ -1931,6 +1932,8 @@ def test_snapshot_cache(make_snapshot, tmp_path):
query=parse_one("SELECT 1"),
)
)
snapshot.add_interval("2024-01-01", "2024-01-02")
snapshot.add_interval("2024-01-01", "2024-01-02", is_dev=True)

loader_called_times = 0

Expand All @@ -1957,10 +1960,32 @@ def _loader(snapshot_ids: t.Set[SnapshotId]) -> t.Collection[Snapshot]:
assert cached_snapshot.model._query_renderer._optimized_cache is not None
assert cached_snapshot.model._data_hash is not None
assert cached_snapshot.model._metadata_hash is not None
assert not cached_snapshot.intervals
assert not cached_snapshot.dev_intervals

cache.clear()
assert cache.get_or_load([snapshot.snapshot_id], _loader) == (
{snapshot.snapshot_id: snapshot},
set(),
)
assert loader_called_times == 2


def test_snapshot_pickle_intervals(make_snapshot):
snapshot = make_snapshot(
SqlModel(
name="test_model_name",
query=parse_one("SELECT 1"),
)
)
snapshot.add_interval("2023-01-01", "2023-01-02")
snapshot.add_interval("2023-01-01", "2023-01-02", is_dev=True)

assert len(snapshot.intervals) > 0
assert len(snapshot.dev_intervals) > 0

loaded_snapshot = pickle.loads(pickle.dumps(snapshot))
assert not loaded_snapshot.intervals
assert not loaded_snapshot.dev_intervals
assert len(snapshot.intervals) > 0
assert len(snapshot.dev_intervals) > 0

0 comments on commit d106f7d

Please sign in to comment.