gateways/repodata: remove load/save/read_mod_and_etag (conda#12981)

Co-authored-by: Ken Odegard <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
tom-warne · Aug 22, 2023 · 4fa4e4a · 4fa4e4a
1 parent 9c7af40
commit 4fa4e4a
Show file tree

Hide file tree

Showing 9 changed files with 123 additions and 236 deletions.
diff --git a/conda/common/toposort.py b/conda/common/toposort.py
@@ -81,7 +81,7 @@ def _safe_toposort(data):
             log.debug(err.args[0])
 
             if not data:
-                return
+                return  # pragma: nocover
 
             yield pop_key(data)
 

diff --git a/conda/core/subdir_data.py b/conda/core/subdir_data.py
@@ -5,14 +5,10 @@
 
 import json
 import pickle
-import re
 from collections import UserList, defaultdict
-from contextlib import closing
-from errno import ENODEV
 from functools import partial
-from itertools import chain, islice
+from itertools import chain
 from logging import getLogger
-from mmap import ACCESS_READ, mmap
 from os.path import exists, join, splitext
 from pathlib import Path
 from time import time
@@ -34,13 +30,15 @@
     RepoInterface,
     cache_fn_url,
     create_cache_dir,
-    get_repo_interface,
 )
+from conda.gateways.repodata import (
+    get_cache_control_max_age as _get_cache_control_max_age,
+)
+from conda.gateways.repodata import get_repo_interface
 
 from ..auxlib.ish import dals
 from ..base.constants import CONDA_PACKAGE_EXTENSION_V1, REPODATA_FN
 from ..base.context import context
-from ..common.compat import ensure_unicode
 from ..common.io import DummyExecutor, ThreadLimitedThreadPoolExecutor, dashlist
 from ..common.iterators import groupby_to_dict as groupby
 from ..common.path import url_to_path
@@ -60,6 +58,15 @@
 REPODATA_HEADER_RE = b'"(_etag|_mod|_cache_control)":[ ]?"(.*?[^\\\\])"[,}\\s]'  # NOQA
 
 
+@deprecated(
+    "24.3",
+    "24.9",
+    addendum="Use `conda.gateways.repodata.get_cache_control_max_age` instead.",
+)
+def get_cache_control_max_age(cache_control_value: str) -> int:
+    return _get_cache_control_max_age(cache_control_value)
+
+
 class SubdirDataType(type):
     def __call__(cls, channel, repodata_fn=REPODATA_FN):
         assert channel.subdir
@@ -297,20 +304,6 @@ def _iter_records_by_name(self, name):
         for i in self._names_index[name]:
             yield self._package_records[i]
 
-    def _load_state(self):
-        """
-        Cache headers and additional data needed to keep track of the cache are
-        stored separately, instead of the previous "added to repodata.json"
-        arrangement.
-        """
-        return self.repo_cache.load_state()
-
-    def _save_state(self, state: RepodataState):
-        assert Path(state.cache_path_json) == Path(self.cache_path_json)
-        assert Path(state.cache_path_state) == Path(self.cache_path_state)
-        assert state.repodata_fn == self.repodata_fn
-        return state.save()
-
     def _load(self):
         """
         Try to load repodata. If e.g. we are downloading
@@ -533,36 +526,6 @@ def _process_raw_repodata(self, repodata: dict, state: RepodataState | None = No
         return _internal_state
 
 
-@deprecated("23.1", "23.9", addendum="Cache headers are now stored in a separate file.")
-def read_mod_and_etag(path):
-    # this function should no longer be used by conda but is kept for API
-    # stability. Was used to read inlined cache information from json; now
-    # stored in separate file.
-    with open(path, "rb") as f:
-        try:
-            with closing(mmap(f.fileno(), 0, access=ACCESS_READ)) as m:
-                match_objects = islice(re.finditer(REPODATA_HEADER_RE, m), 3)
-                result = dict(
-                    map(ensure_unicode, mo.groups()) for mo in match_objects  # type: ignore
-                )
-                return result
-        except (BufferError, ValueError):  # pragma: no cover
-            # BufferError: cannot close exported pointers exist
-            #   https://github.com/conda/conda/issues/4592
-            # ValueError: cannot mmap an empty file
-            return {}
-        except OSError as e:  # pragma: no cover
-            # OSError: [Errno 19] No such device
-            if e.errno == ENODEV:
-                return {}
-            raise
-
-
-def get_cache_control_max_age(cache_control_value: str):
-    max_age = re.search(r"max-age=(\d+)", cache_control_value)
-    return int(max_age.groups()[0]) if max_age else 0
-
-
 def make_feature_record(feature_name):
     # necessary for the SAT solver to do the right thing with features
     pkg_name = "%s@" % feature_name
@@ -596,7 +559,7 @@ def fetch_repodata_remote_request(url, etag, mod_stamp, repodata_fn=REPODATA_FN)
     subdir = SubdirData(Channel(url), repodata_fn=repodata_fn)
 
     try:
-        cache_state = subdir._load_state()
+        cache_state = subdir.repo_cache.load_state()
         cache_state.etag = etag
         cache_state.mod = mod_stamp
         raw_repodata_str = subdir._repo.repodata(cache_state)  # type: ignore

diff --git a/conda/gateways/repodata/__init__.py b/conda/gateways/repodata/__init__.py
@@ -26,7 +26,6 @@
 from ...base.context import context
 from ...common.url import join_url, maybe_unquote
 from ...core.package_cache_data import PackageCacheData
-from ...deprecations import deprecated
 from ...exceptions import (
     CondaDependencyError,
     CondaHTTPError,
@@ -375,50 +374,6 @@ def __init__(
         # XXX may not be that useful/used compared to the full URL
         self.repodata_fn = repodata_fn
 
-    @deprecated("23.3", "23.9", addendum="use RepodataCache")
-    def load(self):
-        """
-        Cache headers and additional data needed to keep track of the cache are
-        stored separately, instead of the previous "added to repodata.json"
-        arrangement.
-        """
-        try:
-            state_path = self.cache_path_state
-            log.debug("Load %s cache from %s", self.repodata_fn, state_path)
-            state = json.loads(state_path.read_text())
-            # json and state files should match
-            json_stat = self.cache_path_json.stat()
-            if not (
-                state.get("mtime_ns") == json_stat.st_mtime_ns
-                and state.get("size") == json_stat.st_size
-            ):
-                # clear mod, etag, cache_control to encourage re-download
-                state.update(
-                    {
-                        ETAG_KEY: "",
-                        LAST_MODIFIED_KEY: "",
-                        CACHE_CONTROL_KEY: "",
-                        "size": 0,
-                    }
-                )
-            self.update(state)  # allow all fields
-        except (json.JSONDecodeError, OSError):
-            log.debug("Could not load state", exc_info=True)
-            self.clear()
-        return self
-
-    @deprecated("23.3", "23.9", addendum="use RepodataCache")
-    def save(self):
-        """Must be called after writing cache_path_json, since mtime is in another file."""
-        serialized = dict(self)
-        json_stat = self.cache_path_json.stat()
-        serialized.update(
-            {"mtime_ns": json_stat.st_mtime_ns, "size": json_stat.st_size}
-        )
-        return pathlib.Path(self.cache_path_state).write_text(
-            json.dumps(serialized, indent=True)
-        )
-
     @property
     def mod(self) -> str:
         """
@@ -624,7 +579,7 @@ def load_state(self):
         """
         try:
             self.load(state_only=True)
-        except FileNotFoundError:
+        except FileNotFoundError:  # or JSONDecodeError?
             self.state.clear()
         return self.state
 
@@ -776,22 +731,14 @@ def url_w_repodata_fn(self):
 
     @property
     def cache_path_json(self):
-        return Path(
-            str(self.cache_path_base)
-            + ("1" if context.use_only_tar_bz2 else "")
-            + ".json"
-        )
+        return self.repo_cache.cache_path_json
 
     @property
     def cache_path_state(self):
         """
         Out-of-band etag and other state needed by the RepoInterface.
         """
-        return Path(
-            str(self.cache_path_base)
-            + ("1" if context.use_only_tar_bz2 else "")
-            + CACHE_STATE_SUFFIX
-        )
+        return self.repo_cache.cache_path_state
 
     @property
     def repo_cache(self) -> RepodataCache:
@@ -891,8 +838,6 @@ def fetch_latest(self) -> tuple[dict | str, RepodataState]:
                 self.url_w_repodata_fn,
             )
             cache.refresh()
-            # touch(self.cache_path_json) # not anymore, or the a separate file is invalid
-            # self._save_state(mod_etag_headers)
             _internal_state = self.read_cache()
             return _internal_state
         else:

diff --git a/tests/core/test_subdir_data.py b/tests/core/test_subdir_data.py
@@ -58,19 +58,15 @@ def test_get_index_no_platform_with_offline_cache(platform=OVERRIDE_PLATFORM):
         {"CONDA_REPODATA_TIMEOUT_SECS": "0", "CONDA_PLATFORM": platform},
         stack_callback=conda_tests_ctxt_mgmt_def_pol,
     ):
-        with patch.object(
-            conda.core.subdir_data, "read_mod_and_etag"
-        ) as read_mod_and_etag:
-            read_mod_and_etag.return_value = {}
-            channel_urls = ("https://repo.anaconda.com/pkgs/pro",)
-
-            this_platform = context.subdir
-            index = get_index(channel_urls=channel_urls, prepend=False)
-            for dist, record in index.items():
-                assert platform_in_record(this_platform, record), (
-                    this_platform,
-                    record.url,
-                )
+        channel_urls = ("https://repo.anaconda.com/pkgs/pro",)
+
+        this_platform = context.subdir
+        index = get_index(channel_urls=channel_urls, prepend=False)
+        for dist, record in index.items():
+            assert platform_in_record(this_platform, record), (
+                this_platform,
+                record.url,
+            )
 
     # When unknown=True (which is implicitly engaged when context.offline is
     # True), there may be additional items in the cache that are included in
@@ -83,32 +79,22 @@ def test_get_index_no_platform_with_offline_cache(platform=OVERRIDE_PLATFORM):
         with env_var(
             "CONDA_OFFLINE", "yes", stack_callback=conda_tests_ctxt_mgmt_def_pol
         ):
-            # note `fetch_repodata_remote_request` will no longer be called
-            # by conda code, and is only there for backwards compatibility.
-            with patch.object(
-                conda.core.subdir_data, "fetch_repodata_remote_request"
-            ) as remote_request:
-                index2 = get_index(
-                    channel_urls=channel_urls, prepend=False, unknown=unknown
-                )
-                assert all(index2.get(k) == rec for k, rec in index.items())
-                assert unknown is not False or len(index) == len(index2)
-                assert remote_request.call_count == 0
+            index2 = get_index(
+                channel_urls=channel_urls, prepend=False, unknown=unknown
+            )
+            assert all(index2.get(k) == rec for k, rec in index.items())
+            assert unknown is not False or len(index) == len(index2)
 
     for unknown in (False, True):
         with env_vars(
             {"CONDA_REPODATA_TIMEOUT_SECS": "0", "CONDA_PLATFORM": "linux-64"},
             stack_callback=conda_tests_ctxt_mgmt_def_pol,
         ):
-            with patch.object(
-                conda.core.subdir_data, "fetch_repodata_remote_request"
-            ) as remote_request:
-                remote_request.side_effect = Response304ContentUnchanged()
-                index3 = get_index(
-                    channel_urls=channel_urls, prepend=False, unknown=unknown
-                )
-                assert all(index3.get(k) == rec for k, rec in index.items())
-                assert unknown or len(index) == len(index3)
+            index3 = get_index(
+                channel_urls=channel_urls, prepend=False, unknown=unknown
+            )
+            assert all(index3.get(k) == rec for k, rec in index.items())
+            assert unknown or len(index) == len(index3)
 
     # only works if CONDA_PLATFORM exists in tests/data/conda_format_repo
     # (test will not pass on newer platforms with default CONDA_PLATFORM =
@@ -173,27 +159,6 @@ def test_fetch_repodata_remote_request_invalid_arch():
     assert result is None
 
 
-def test_fetch_repodata_remote_request_invalid_noarch():
-    url = "file:///fake/fake/fake/noarch"
-    etag = None
-    mod_stamp = "Mon, 28 Jan 2019 01:01:01 GMT"
-    with pytest.raises(UnavailableInvalidChannel):
-        fetch_repodata_remote_request(url, etag, mod_stamp)
-
-
-def test_no_ssl(mocker):
-    def CondaSession_get(*args, **kwargs):
-        raise SSLError("Got an SSL error")
-
-    mocker.patch.object(CondaSession, "get", CondaSession_get)
-
-    url = "https://www.fake.fake/fake/fake/noarch"
-    etag = None
-    mod_stamp = "Mon, 28 Jan 2019 01:01:01 GMT"
-    with pytest.raises(CondaSSLError):
-        fetch_repodata_remote_request(url, etag, mod_stamp)
-
-
 def test_subdir_data_prefers_conda_to_tar_bz2(platform=OVERRIDE_PLATFORM):
     # force this to False, because otherwise tests fail when run with old conda-build
     with env_vars(
@@ -248,9 +213,6 @@ def __exit__(self, *exc):
         sd.reload()
         assert all(r.name == "zlib" for r in sd._iter_records_by_name("zlib"))  # type: ignore
 
-        # newly deprecated, run them anyway
-        sd._save_state(sd._load_state())
-
 
 def test_repodata_version_error(platform=OVERRIDE_PLATFORM):
     channel = Channel(url_path(join(CHANNEL_DIR, platform)))
@@ -351,10 +313,10 @@ def repo_fetch(self):
             )
 
     SubdirData.clear_cached_local_channel_data(exclude_file=False)
-    sd = BadCacheSubdirData(channel=local_channel)
+    sd: SubdirData = BadCacheSubdirData(channel=local_channel)
 
     with pytest.raises(CondaError):
-        state = sd._load_state()
+        state = sd.repo_cache.load_state()
         # tortured way to get to old ValueError handler
         bad_cache.write_text("NOT JSON")
         sd._read_local_repodata(state)

diff --git a/tests/data/build-index2-json.py b/tests/data/build-index2-json.py
@@ -3,17 +3,17 @@
 import json
 from os.path import abspath, dirname, join
 
-from conda.core.subdir_data import fetch_repodata_remote_request
+from conda.core.subdir_data import Channel, SubdirData
 
 DATA_DIR = abspath(join(dirname(__file__), "repodata"))
 
 
 def save_data_source(url, name):
-    raw_repodata_str = fetch_repodata_remote_request(url, None, None)
-    json.loads(raw_repodata_str)
+    sd = SubdirData(Channel(url))
+    repodata, _state = sd.repo_fetch.fetch_latest_parsed()
     with open(join(DATA_DIR, name + ".json"), "w") as fh:
         json.dump(
-            json.loads(raw_repodata_str),
+            repodata,
             fh,
             indent=2,
             sort_keys=True,

diff --git a/tests/gateways/test_jlap.py b/tests/gateways/test_jlap.py
@@ -416,6 +416,19 @@ def test_jlap_sought(
         assert len(patched["info"]) == 1  # patches not found in bad jlap file
 
 
+def test_jlap_coverage():
+    """
+    Force raise RepodataOnDisk() at end of JlapRepoInterface.repodata() function.
+    """
+
+    class JlapCoverMe(interface.JlapRepoInterface):
+        def repodata_parsed(self, state):
+            return
+
+    with pytest.raises(RepodataOnDisk):
+        JlapCoverMe("", "", cache=None).repodata({})  # type: ignore
+
+
 def test_jlap_errors(
     package_server: socket, tmp_path: Path, package_repository_base: Path, mocker
 ):