Skip to content

Commit

Permalink
gateways/repodata: remove load/save/read_mod_and_etag (conda#12981)
Browse files Browse the repository at this point in the history
Co-authored-by: Ken Odegard <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Aug 22, 2023
1 parent 9c7af40 commit 4fa4e4a
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 236 deletions.
2 changes: 1 addition & 1 deletion conda/common/toposort.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def _safe_toposort(data):
log.debug(err.args[0])

if not data:
return
return # pragma: nocover

yield pop_key(data)

Expand Down
67 changes: 15 additions & 52 deletions conda/core/subdir_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,10 @@

import json
import pickle
import re
from collections import UserList, defaultdict
from contextlib import closing
from errno import ENODEV
from functools import partial
from itertools import chain, islice
from itertools import chain
from logging import getLogger
from mmap import ACCESS_READ, mmap
from os.path import exists, join, splitext
from pathlib import Path
from time import time
Expand All @@ -34,13 +30,15 @@
RepoInterface,
cache_fn_url,
create_cache_dir,
get_repo_interface,
)
from conda.gateways.repodata import (
get_cache_control_max_age as _get_cache_control_max_age,
)
from conda.gateways.repodata import get_repo_interface

from ..auxlib.ish import dals
from ..base.constants import CONDA_PACKAGE_EXTENSION_V1, REPODATA_FN
from ..base.context import context
from ..common.compat import ensure_unicode
from ..common.io import DummyExecutor, ThreadLimitedThreadPoolExecutor, dashlist
from ..common.iterators import groupby_to_dict as groupby
from ..common.path import url_to_path
Expand All @@ -60,6 +58,15 @@
REPODATA_HEADER_RE = b'"(_etag|_mod|_cache_control)":[ ]?"(.*?[^\\\\])"[,}\\s]' # NOQA


@deprecated(
"24.3",
"24.9",
addendum="Use `conda.gateways.repodata.get_cache_control_max_age` instead.",
)
def get_cache_control_max_age(cache_control_value: str) -> int:
return _get_cache_control_max_age(cache_control_value)


class SubdirDataType(type):
def __call__(cls, channel, repodata_fn=REPODATA_FN):
assert channel.subdir
Expand Down Expand Up @@ -297,20 +304,6 @@ def _iter_records_by_name(self, name):
for i in self._names_index[name]:
yield self._package_records[i]

def _load_state(self):
"""
Cache headers and additional data needed to keep track of the cache are
stored separately, instead of the previous "added to repodata.json"
arrangement.
"""
return self.repo_cache.load_state()

def _save_state(self, state: RepodataState):
assert Path(state.cache_path_json) == Path(self.cache_path_json)
assert Path(state.cache_path_state) == Path(self.cache_path_state)
assert state.repodata_fn == self.repodata_fn
return state.save()

def _load(self):
"""
Try to load repodata. If e.g. we are downloading
Expand Down Expand Up @@ -533,36 +526,6 @@ def _process_raw_repodata(self, repodata: dict, state: RepodataState | None = No
return _internal_state


@deprecated("23.1", "23.9", addendum="Cache headers are now stored in a separate file.")
def read_mod_and_etag(path):
# this function should no longer be used by conda but is kept for API
# stability. Was used to read inlined cache information from json; now
# stored in separate file.
with open(path, "rb") as f:
try:
with closing(mmap(f.fileno(), 0, access=ACCESS_READ)) as m:
match_objects = islice(re.finditer(REPODATA_HEADER_RE, m), 3)
result = dict(
map(ensure_unicode, mo.groups()) for mo in match_objects # type: ignore
)
return result
except (BufferError, ValueError): # pragma: no cover
# BufferError: cannot close exported pointers exist
# https://github.com/conda/conda/issues/4592
# ValueError: cannot mmap an empty file
return {}
except OSError as e: # pragma: no cover
# OSError: [Errno 19] No such device
if e.errno == ENODEV:
return {}
raise


def get_cache_control_max_age(cache_control_value: str):
max_age = re.search(r"max-age=(\d+)", cache_control_value)
return int(max_age.groups()[0]) if max_age else 0


def make_feature_record(feature_name):
# necessary for the SAT solver to do the right thing with features
pkg_name = "%s@" % feature_name
Expand Down Expand Up @@ -596,7 +559,7 @@ def fetch_repodata_remote_request(url, etag, mod_stamp, repodata_fn=REPODATA_FN)
subdir = SubdirData(Channel(url), repodata_fn=repodata_fn)

try:
cache_state = subdir._load_state()
cache_state = subdir.repo_cache.load_state()
cache_state.etag = etag
cache_state.mod = mod_stamp
raw_repodata_str = subdir._repo.repodata(cache_state) # type: ignore
Expand Down
61 changes: 3 additions & 58 deletions conda/gateways/repodata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from ...base.context import context
from ...common.url import join_url, maybe_unquote
from ...core.package_cache_data import PackageCacheData
from ...deprecations import deprecated
from ...exceptions import (
CondaDependencyError,
CondaHTTPError,
Expand Down Expand Up @@ -375,50 +374,6 @@ def __init__(
# XXX may not be that useful/used compared to the full URL
self.repodata_fn = repodata_fn

@deprecated("23.3", "23.9", addendum="use RepodataCache")
def load(self):
"""
Cache headers and additional data needed to keep track of the cache are
stored separately, instead of the previous "added to repodata.json"
arrangement.
"""
try:
state_path = self.cache_path_state
log.debug("Load %s cache from %s", self.repodata_fn, state_path)
state = json.loads(state_path.read_text())
# json and state files should match
json_stat = self.cache_path_json.stat()
if not (
state.get("mtime_ns") == json_stat.st_mtime_ns
and state.get("size") == json_stat.st_size
):
# clear mod, etag, cache_control to encourage re-download
state.update(
{
ETAG_KEY: "",
LAST_MODIFIED_KEY: "",
CACHE_CONTROL_KEY: "",
"size": 0,
}
)
self.update(state) # allow all fields
except (json.JSONDecodeError, OSError):
log.debug("Could not load state", exc_info=True)
self.clear()
return self

@deprecated("23.3", "23.9", addendum="use RepodataCache")
def save(self):
"""Must be called after writing cache_path_json, since mtime is in another file."""
serialized = dict(self)
json_stat = self.cache_path_json.stat()
serialized.update(
{"mtime_ns": json_stat.st_mtime_ns, "size": json_stat.st_size}
)
return pathlib.Path(self.cache_path_state).write_text(
json.dumps(serialized, indent=True)
)

@property
def mod(self) -> str:
"""
Expand Down Expand Up @@ -624,7 +579,7 @@ def load_state(self):
"""
try:
self.load(state_only=True)
except FileNotFoundError:
except FileNotFoundError: # or JSONDecodeError?
self.state.clear()
return self.state

Expand Down Expand Up @@ -776,22 +731,14 @@ def url_w_repodata_fn(self):

@property
def cache_path_json(self):
return Path(
str(self.cache_path_base)
+ ("1" if context.use_only_tar_bz2 else "")
+ ".json"
)
return self.repo_cache.cache_path_json

@property
def cache_path_state(self):
"""
Out-of-band etag and other state needed by the RepoInterface.
"""
return Path(
str(self.cache_path_base)
+ ("1" if context.use_only_tar_bz2 else "")
+ CACHE_STATE_SUFFIX
)
return self.repo_cache.cache_path_state

@property
def repo_cache(self) -> RepodataCache:
Expand Down Expand Up @@ -891,8 +838,6 @@ def fetch_latest(self) -> tuple[dict | str, RepodataState]:
self.url_w_repodata_fn,
)
cache.refresh()
# touch(self.cache_path_json) # not anymore, or the a separate file is invalid
# self._save_state(mod_etag_headers)
_internal_state = self.read_cache()
return _internal_state
else:
Expand Down
80 changes: 21 additions & 59 deletions tests/core/test_subdir_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,15 @@ def test_get_index_no_platform_with_offline_cache(platform=OVERRIDE_PLATFORM):
{"CONDA_REPODATA_TIMEOUT_SECS": "0", "CONDA_PLATFORM": platform},
stack_callback=conda_tests_ctxt_mgmt_def_pol,
):
with patch.object(
conda.core.subdir_data, "read_mod_and_etag"
) as read_mod_and_etag:
read_mod_and_etag.return_value = {}
channel_urls = ("https://repo.anaconda.com/pkgs/pro",)

this_platform = context.subdir
index = get_index(channel_urls=channel_urls, prepend=False)
for dist, record in index.items():
assert platform_in_record(this_platform, record), (
this_platform,
record.url,
)
channel_urls = ("https://repo.anaconda.com/pkgs/pro",)

this_platform = context.subdir
index = get_index(channel_urls=channel_urls, prepend=False)
for dist, record in index.items():
assert platform_in_record(this_platform, record), (
this_platform,
record.url,
)

# When unknown=True (which is implicitly engaged when context.offline is
# True), there may be additional items in the cache that are included in
Expand All @@ -83,32 +79,22 @@ def test_get_index_no_platform_with_offline_cache(platform=OVERRIDE_PLATFORM):
with env_var(
"CONDA_OFFLINE", "yes", stack_callback=conda_tests_ctxt_mgmt_def_pol
):
# note `fetch_repodata_remote_request` will no longer be called
# by conda code, and is only there for backwards compatibility.
with patch.object(
conda.core.subdir_data, "fetch_repodata_remote_request"
) as remote_request:
index2 = get_index(
channel_urls=channel_urls, prepend=False, unknown=unknown
)
assert all(index2.get(k) == rec for k, rec in index.items())
assert unknown is not False or len(index) == len(index2)
assert remote_request.call_count == 0
index2 = get_index(
channel_urls=channel_urls, prepend=False, unknown=unknown
)
assert all(index2.get(k) == rec for k, rec in index.items())
assert unknown is not False or len(index) == len(index2)

for unknown in (False, True):
with env_vars(
{"CONDA_REPODATA_TIMEOUT_SECS": "0", "CONDA_PLATFORM": "linux-64"},
stack_callback=conda_tests_ctxt_mgmt_def_pol,
):
with patch.object(
conda.core.subdir_data, "fetch_repodata_remote_request"
) as remote_request:
remote_request.side_effect = Response304ContentUnchanged()
index3 = get_index(
channel_urls=channel_urls, prepend=False, unknown=unknown
)
assert all(index3.get(k) == rec for k, rec in index.items())
assert unknown or len(index) == len(index3)
index3 = get_index(
channel_urls=channel_urls, prepend=False, unknown=unknown
)
assert all(index3.get(k) == rec for k, rec in index.items())
assert unknown or len(index) == len(index3)

# only works if CONDA_PLATFORM exists in tests/data/conda_format_repo
# (test will not pass on newer platforms with default CONDA_PLATFORM =
Expand Down Expand Up @@ -173,27 +159,6 @@ def test_fetch_repodata_remote_request_invalid_arch():
assert result is None


def test_fetch_repodata_remote_request_invalid_noarch():
url = "file:///fake/fake/fake/noarch"
etag = None
mod_stamp = "Mon, 28 Jan 2019 01:01:01 GMT"
with pytest.raises(UnavailableInvalidChannel):
fetch_repodata_remote_request(url, etag, mod_stamp)


def test_no_ssl(mocker):
def CondaSession_get(*args, **kwargs):
raise SSLError("Got an SSL error")

mocker.patch.object(CondaSession, "get", CondaSession_get)

url = "https://www.fake.fake/fake/fake/noarch"
etag = None
mod_stamp = "Mon, 28 Jan 2019 01:01:01 GMT"
with pytest.raises(CondaSSLError):
fetch_repodata_remote_request(url, etag, mod_stamp)


def test_subdir_data_prefers_conda_to_tar_bz2(platform=OVERRIDE_PLATFORM):
# force this to False, because otherwise tests fail when run with old conda-build
with env_vars(
Expand Down Expand Up @@ -248,9 +213,6 @@ def __exit__(self, *exc):
sd.reload()
assert all(r.name == "zlib" for r in sd._iter_records_by_name("zlib")) # type: ignore

# newly deprecated, run them anyway
sd._save_state(sd._load_state())


def test_repodata_version_error(platform=OVERRIDE_PLATFORM):
channel = Channel(url_path(join(CHANNEL_DIR, platform)))
Expand Down Expand Up @@ -351,10 +313,10 @@ def repo_fetch(self):
)

SubdirData.clear_cached_local_channel_data(exclude_file=False)
sd = BadCacheSubdirData(channel=local_channel)
sd: SubdirData = BadCacheSubdirData(channel=local_channel)

with pytest.raises(CondaError):
state = sd._load_state()
state = sd.repo_cache.load_state()
# tortured way to get to old ValueError handler
bad_cache.write_text("NOT JSON")
sd._read_local_repodata(state)
Expand Down
8 changes: 4 additions & 4 deletions tests/data/build-index2-json.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
import json
from os.path import abspath, dirname, join

from conda.core.subdir_data import fetch_repodata_remote_request
from conda.core.subdir_data import Channel, SubdirData

DATA_DIR = abspath(join(dirname(__file__), "repodata"))


def save_data_source(url, name):
raw_repodata_str = fetch_repodata_remote_request(url, None, None)
json.loads(raw_repodata_str)
sd = SubdirData(Channel(url))
repodata, _state = sd.repo_fetch.fetch_latest_parsed()
with open(join(DATA_DIR, name + ".json"), "w") as fh:
json.dump(
json.loads(raw_repodata_str),
repodata,
fh,
indent=2,
sort_keys=True,
Expand Down
13 changes: 13 additions & 0 deletions tests/gateways/test_jlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,19 @@ def test_jlap_sought(
assert len(patched["info"]) == 1 # patches not found in bad jlap file


def test_jlap_coverage():
"""
Force raise RepodataOnDisk() at end of JlapRepoInterface.repodata() function.
"""

class JlapCoverMe(interface.JlapRepoInterface):
def repodata_parsed(self, state):
return

with pytest.raises(RepodataOnDisk):
JlapCoverMe("", "", cache=None).repodata({}) # type: ignore


def test_jlap_errors(
package_server: socket, tmp_path: Path, package_repository_base: Path, mocker
):
Expand Down
Loading

0 comments on commit 4fa4e4a

Please sign in to comment.