From 5205b2c6c392f7a212bf855e572dc59d3e21ca6f Mon Sep 17 00:00:00 2001 From: Benjy Weinberger Date: Fri, 18 Mar 2022 00:02:37 -0700 Subject: [PATCH] [internal] Simplify config parsing. (#14846) There were too many unnecessary abstractions bogging down the code, and making it harder to adapt and modify. This change streamlines things by getting rid of the Single/Chained config classes. Instead the _ConfigValues class, which already did most of the work, takes a new path field, so it can encapsulate the source file from which its values were parsed. Then, the Config ABC becomes a regular concrete dataclass that exists to directly handle multiple config files. This allows us to remove many superfluous methods, exceptions and logic, that were only there to prop up the abstractions. [ci skip-rust] [ci skip-build-wheels] --- .../internal_plugins/releases/register.py | 4 +- src/python/pants/option/config.py | 264 +++++------------- src/python/pants/option/config_test.py | 84 +----- src/python/pants/option/errors.py | 10 - src/python/pants/option/options.py | 32 +-- .../pants/option/options_integration_test.py | 2 +- src/python/pants/option/parser.py | 14 +- 7 files changed, 85 insertions(+), 325 deletions(-) diff --git a/pants-plugins/internal_plugins/releases/register.py b/pants-plugins/internal_plugins/releases/register.py index 59500d0f442..54be6892195 100644 --- a/pants-plugins/internal_plugins/releases/register.py +++ b/pants-plugins/internal_plugins/releases/register.py @@ -20,7 +20,7 @@ from pants.engine.target import Target from pants.engine.unions import UnionRule from pants.option.alias import CliAlias -from pants.option.config import _ChainedConfig +from pants.option.config import Config from pants.option.option_types import DictOption from pants.option.options_bootstrapper import OptionsBootstrapper from pants.option.subsystem import Subsystem @@ -181,7 +181,7 @@ async def check_default_tools( SessionValues( { OptionsBootstrapper: OptionsBootstrapper( - tuple(), ("./pants",), args, _ChainedConfig(tuple()), CliAlias() + tuple(), ("./pants",), args, Config(tuple()), CliAlias() ) } ), diff --git a/src/python/pants/option/config.py b/src/python/pants/option/config.py index 1df72e06d57..b1184120679 100644 --- a/src/python/pants/option/config.py +++ b/src/python/pants/option/config.py @@ -4,27 +4,22 @@ from __future__ import annotations import getpass -import itertools +import logging import os import re -from abc import ABC, abstractmethod from dataclasses import dataclass from functools import partial -from hashlib import sha1 -from typing import Any, ClassVar, Dict, Iterable, List, Mapping, Sequence, Union, cast +from typing import Any, Dict, Iterable, List, Mapping, Union, cast import toml from typing_extensions import Protocol from pants.base.build_environment import get_buildroot -from pants.option.errors import ( - ConfigError, - InterpolationMissingOptionError, - NoOptionError, - NoSectionError, -) +from pants.option.errors import ConfigError, ConfigValidationError, InterpolationMissingOptionError from pants.option.ranked_value import Value -from pants.util.ordered_set import OrderedSet + +logger = logging.getLogger(__name__) + # A dict with optional override seed values for buildroot, pants_workdir, and pants_distdir. SeedValues = Dict[str, Value] @@ -46,7 +41,11 @@ def content(self) -> bytes: raise NotImplementedError() -class Config(ABC): +DEFAULT_SECTION = "DEFAULT" + + +@dataclass(frozen=True, eq=False) +class Config: """Encapsulates config file loading and access, including encapsulation of support for multiple config files. @@ -54,7 +53,7 @@ class Config(ABC): replaced with the value of var_name. """ - DEFAULT_SECTION: ClassVar[str] = "DEFAULT" + values: tuple[_ConfigValues, ...] @classmethod def load( @@ -69,40 +68,40 @@ def load( DEFAULT section, and be available for use in substitutions. The caller may override some of these seed values. """ - single_file_configs = [] + config_values = [] for file_content in file_contents: - content_digest = sha1(file_content.content).hexdigest() normalized_seed_values = cls._determine_seed_values(seed_values=seed_values) - try: - config_values = cls._parse_toml( - file_content.content.decode(), normalized_seed_values - ) + config_values.append(cls._parse_toml(file_content, normalized_seed_values)) except Exception as e: raise ConfigError( f"Config file {file_content.path} could not be parsed as TOML:\n {e}" ) - - single_file_configs.append( - _SingleFileConfig( - config_path=file_content.path, - content_digest=content_digest, - values=config_values, - ), - ) - return _ChainedConfig(tuple(reversed(single_file_configs))) + return cls(tuple(config_values)) @classmethod def _parse_toml( - cls, config_content: str, normalized_seed_values: dict[str, str] + cls, config_source: ConfigSource, normalized_seed_values: dict[str, str] ) -> _ConfigValues: """Attempt to parse as TOML, raising an exception on failure.""" - toml_values = cast(Dict[str, Any], toml.loads(config_content)) - toml_values["DEFAULT"] = { + toml_values = cast(Dict[str, Any], toml.loads(config_source.content.decode())) + toml_values[DEFAULT_SECTION] = { **normalized_seed_values, - **toml_values.get("DEFAULT", {}), + **toml_values.get(DEFAULT_SECTION, {}), } - return _ConfigValues(toml_values) + return _ConfigValues(config_source.path, toml_values) + + def verify(self, section_to_valid_options: dict[str, set[str]]): + error_log = [] + for config_values in self.values: + error_log.extend(config_values.get_verification_errors(section_to_valid_options)) + if error_log: + for error in error_log: + logger.error(error) + raise ConfigValidationError( + "Invalid config entries detected. See log for details on which entries to update " + "or remove.\n(Specify --no-verify-config to disable this check.)" + ) @staticmethod def _determine_seed_values(*, seed_values: SeedValues | None = None) -> dict[str, str]: @@ -135,46 +134,23 @@ def get(self, section, option) -> str | None: If the specified section does not exist or is missing a definition for the option, the value is looked up in the DEFAULT section. If there is still no definition found, returns None. """ - if not self.has_option(section, option): - return None - return self.get_value(section, option) - - @abstractmethod - def configs(self) -> Sequence[_SingleFileConfig]: - """Returns the underlying single-file configs represented by this object.""" + for vals in reversed(self.values): + val = vals.get_value(section, option) + if val is not None: + return val + return None - @abstractmethod def sources(self) -> list[str]: """Returns the sources of this config as a list of filenames.""" + return [vals.path for vals in self.values] - @abstractmethod - def sections(self) -> list[str]: - """Returns the sections in this config (not including DEFAULT).""" - - @abstractmethod - def has_section(self, section: str) -> bool: - """Returns whether this config has the section.""" - - @abstractmethod - def has_option(self, section: str, option: str) -> bool: - """Returns whether this config specified a value for the option.""" - - @abstractmethod - def get_value(self, section: str, option: str) -> str: - """Returns the value of the option in this config as a string. - - Raises NoSectionError if the section doesn't exist. Raises NoOptionError if the option - doesn't exist in the section. - """ - - @abstractmethod - def get_source_for_option(self, section: str, option: str) -> str | None: - """Returns the path to the source file the given option was defined in. - - :param section: the scope of the option. - :param option: the name of the option. - :returns: the path to the config file, or None if the option was not defined by a config file. - """ + def get_sources_for_option(self, section: str, option: str) -> list[str]: + """Returns the path(s) to the source file(s) the given option was defined in.""" + paths = [] + for vals in reversed(self.values): + if vals.get_value(section, option) is not None: + paths.append(os.path.relpath(vals.path)) + return paths _TomlPrimitive = Union[bool, int, float, str] @@ -185,7 +161,8 @@ def get_source_for_option(self, section: str, option: str) -> str | None: class _ConfigValues: """The parsed contents of a TOML config file.""" - values: dict[str, Any] + path: str + section_to_values: dict[str, dict[str, Any]] @staticmethod def _is_an_option(option_value: _TomlValue | dict) -> bool: @@ -281,22 +258,10 @@ def _stringify_val_without_interpolation(self, raw_value: _TomlValue) -> str: interpolate=False, ) - @property - def sections(self) -> list[str]: - return [scope for scope in self.values if scope != "DEFAULT"] - - def has_section(self, section: str) -> bool: - return section in self.values - - def has_option(self, section: str, option: str) -> bool: - if not self.has_section(section): - return False - return option in self.values[section] or option in self.defaults - - def get_value(self, section: str, option: str) -> str: - section_values = self.values.get(section) + def get_value(self, section: str, option: str) -> str | None: + section_values = self.section_to_values.get(section) if section_values is None: - raise NoSectionError(section) + return None stringify = partial( self._stringify_val, @@ -308,7 +273,7 @@ def get_value(self, section: str, option: str) -> str: if option not in section_values: if option in self.defaults: return stringify(raw_value=self.defaults[option]) - raise NoOptionError(option, section) + return None option_value = section_values[option] if not isinstance(option_value, dict): @@ -330,124 +295,29 @@ def get_value(self, section: str, option: str) -> str: return add_val return remove_val - def options(self, section: str) -> list[str]: - section_values = self.values.get(section) - if section_values is None: - raise NoSectionError(section) - return [ - *section_values.keys(), - *( - default_option - for default_option in self.defaults - if default_option not in section_values - ), - ] - @property def defaults(self) -> dict[str, str]: return { option: self._stringify_val_without_interpolation(option_val) - for option, option_val in self.values["DEFAULT"].items() + for option, option_val in self.section_to_values[DEFAULT_SECTION].items() } - -@dataclass(frozen=True, eq=False) -class _SingleFileConfig(Config): - """Config read from a single file.""" - - config_path: str - content_digest: str - values: _ConfigValues - - def configs(self) -> list[_SingleFileConfig]: - return [self] - - def sources(self) -> list[str]: - return [self.config_path] - - def sections(self) -> list[str]: - return self.values.sections - - def has_section(self, section: str) -> bool: - return self.values.has_section(section) - - def has_option(self, section: str, option: str) -> bool: - return self.values.has_option(section, option) - - def get_value(self, section: str, option: str) -> str: - return self.values.get_value(section, option) - - def get_source_for_option(self, section: str, option: str) -> str | None: - if self.has_option(section, option): - return self.sources()[0] - return None - - def __repr__(self) -> str: - return f"SingleFileConfig({self.config_path})" - - def __eq__(self, other: Any) -> bool: - if not isinstance(other, _SingleFileConfig): - return NotImplemented - return self.config_path == other.config_path and self.content_digest == other.content_digest - - def __hash__(self) -> int: - return hash(self.content_digest) - - -@dataclass(frozen=True) -class _ChainedConfig(Config): - """Config read from multiple sources.""" - - # Config instances to chain. Later instances take precedence over earlier ones. - chained_configs: tuple[_SingleFileConfig, ...] - - @property - def _configs(self) -> tuple[_SingleFileConfig, ...]: - return self.chained_configs - - def configs(self) -> tuple[_SingleFileConfig, ...]: - return self.chained_configs - - def sources(self) -> list[str]: - # NB: Present the sources in the order we were given them. - return list(itertools.chain.from_iterable(cfg.sources() for cfg in reversed(self._configs))) - - def sections(self) -> list[str]: - ret: OrderedSet[str] = OrderedSet() - for cfg in self._configs: - ret.update(cfg.sections()) - return list(ret) - - def has_section(self, section: str) -> bool: - for cfg in self._configs: - if cfg.has_section(section): - return True - return False - - def has_option(self, section: str, option: str) -> bool: - for cfg in self._configs: - if cfg.has_option(section, option): - return True - return False - - def get_value(self, section: str, option: str) -> str: - for cfg in self._configs: + def get_verification_errors(self, section_to_valid_options: dict[str, set[str]]) -> list[str]: + error_log = [] + for section, vals in self.section_to_values.items(): + if section == DEFAULT_SECTION: + continue try: - return cfg.get_value(section, option) - except (NoSectionError, NoOptionError): - pass - if not self.has_section(section): - raise NoSectionError(section) - raise NoOptionError(option, section) - - def get_source_for_option(self, section: str, option: str) -> str | None: - for cfg in self._configs: - if cfg.has_option(section, option): - return cfg.get_source_for_option(section, option) - return None - - def __repr__(self) -> str: - return f"ChainedConfig({self.sources()})" + valid_options_in_section = section_to_valid_options[section] + except KeyError: + error_log.append(f"Invalid section [{section}] in {self.path}") + else: + for option in sorted(set(vals.keys()) - valid_options_in_section): + if option not in valid_options_in_section: + error_log.append( + f"Invalid option '{option}' under [{section}] in {self.path}" + ) + return error_log @dataclass(frozen=True) diff --git a/src/python/pants/option/config_test.py b/src/python/pants/option/config_test.py index cc160e22e22..dbd23b98f6e 100644 --- a/src/python/pants/option/config_test.py +++ b/src/python/pants/option/config_test.py @@ -6,12 +6,8 @@ from textwrap import dedent from typing import Dict -import pytest - from pants.engine.fs import FileContent from pants.option.config import Config, TomlSerializer -from pants.option.errors import NoSectionError -from pants.util.ordered_set import OrderedSet @dataclass(frozen=True) @@ -128,79 +124,10 @@ def setUp(self) -> None: "a": {**FILE_2.expected_options["a"], **FILE_1.expected_options["a"]}, } - def test_sections(self) -> None: - expected_sections = OrderedSet( - [*FILE_2.expected_options.keys(), *FILE_1.expected_options.keys()] - ) - assert self.config.sections() == list(expected_sections) - for section in expected_sections: - assert self.config.has_section(section) is True - - def test_has_option(self) -> None: - # Check has all DEFAULT values - for default_option in (*self.default_seed_values.keys(), *FILE_1.default_values.keys()): - assert self.config.has_option(section="DEFAULT", option=default_option) is True - # Check every explicitly defined section has its options + the seed defaults - for section, options in self.expected_combined_values.items(): - for option in (*options, *self.default_seed_values): - assert self.config.has_option(section=section, option=option) is True - # Check every section for file1 also has file1's DEFAULT values - for section in FILE_1.expected_options: - for option in FILE_1.default_values: - assert self.config.has_option(section=section, option=option) is True - # Check that file1's DEFAULT values don't apply to sections only defined in file2 - sections_only_in_file2 = set(FILE_2.expected_options.keys()) - set( - FILE_1.expected_options.keys() - ) - for section in sections_only_in_file2: - for option in FILE_1.default_values: - assert self.config.has_option(section=section, option=option) is False - # Check that non-existent options are False - nonexistent_options = { - "DEFAULT": "fake", - "a": "fake", - "b": "fast", - } - for section, option in nonexistent_options.items(): - assert self.config.has_option(section=section, option=option) is False - # Check that sections aren't misclassified as options - nested_sections = { - "b": "nested", - "b.nested": "nested-again", - "p": "child", - } - for parent_section, child_section in nested_sections.items(): - assert self.config.has_option(section=parent_section, option=child_section) is False - - def test_list_all_options(self) -> None: - # This is used in `options_bootstrapper.py` to validate that every option is recognized. - file1_config = self.config.configs()[1] - file2_config = self.config.configs()[0] - for section, options in FILE_1.expected_options.items(): - expected = list(options.keys()) - expected.extend( - default_option - for default_option in ( - *self.default_seed_values.keys(), - *FILE_1.default_values.keys(), - ) - if default_option not in expected - ) - assert file1_config.values.options(section=section) == expected - for section, options in FILE_2.expected_options.items(): - assert file2_config.values.options(section=section) == [ - *options.keys(), - *self.default_seed_values.keys(), - ] - # Check non-existent section - for config in file1_config, file2_config: - with pytest.raises(NoSectionError): - config.values.options("fake") - def test_default_values(self) -> None: # This is used in `options_bootstrapper.py` to ignore default values when validating options. - file1_config = self.config.configs()[1] - file2_config = self.config.configs()[0] + file1_config_values = self.config.values[0] + file2_config_values = self.config.values[1] # NB: string interpolation should only happen when calling _ConfigValues.get_value(). The # values for _ConfigValues.defaults are not yet interpolated. default_file1_values_unexpanded = { @@ -208,11 +135,11 @@ def test_default_values(self) -> None: "path": "/a/b/%(answer)s", "embed": "%(path)s::foo", } - assert file1_config.values.defaults == { + assert file1_config_values.defaults == { **self.default_seed_values, **default_file1_values_unexpanded, } - assert file2_config.values.defaults == self.default_seed_values + assert file2_config_values.defaults == self.default_seed_values def test_get(self) -> None: # Check the DEFAULT section @@ -231,10 +158,7 @@ def test_get(self) -> None: def test_empty(self) -> None: config = Config.load([]) - assert config.sections() == [] assert config.sources() == [] - assert config.has_section("DEFAULT") is False - assert config.has_option(section="DEFAULT", option="name") is False def test_toml_serializer() -> None: diff --git a/src/python/pants/option/errors.py b/src/python/pants/option/errors.py index f71d5ada4f0..63c2bcc7b9b 100644 --- a/src/python/pants/option/errors.py +++ b/src/python/pants/option/errors.py @@ -136,16 +136,6 @@ class ConfigValidationError(ConfigError): """A config file is invalid.""" -class NoSectionError(ConfigError): - def __init__(self, section: str): - super().__init__(f"No section: {section}") - - -class NoOptionError(ConfigError): - def __init__(self, option: str, section: str): - super().__init__(f"No option {option} in section {section}") - - class InterpolationMissingOptionError(ConfigError): def __init__(self, option, section, rawval, reference): super().__init__( diff --git a/src/python/pants/option/options.py b/src/python/pants/option/options.py index f925f84faa1..e0b054663f8 100644 --- a/src/python/pants/option/options.py +++ b/src/python/pants/option/options.py @@ -223,33 +223,11 @@ def scope_to_flags(self) -> dict[str, list[str]]: def verify_configs(self, global_config: Config) -> None: """Verify all loaded configs have correct scopes and options.""" - error_log = [] - for config in global_config.configs(): - for section in config.sections(): - scope = GLOBAL_SCOPE if section == GLOBAL_SCOPE_CONFIG_SECTION else section - try: - valid_options_under_scope = set(self.for_scope(scope, check_deprecations=False)) - # Only catch ConfigValidationError. Other exceptions will be raised directly. - except ConfigValidationError: - error_log.append(f"Invalid scope [{section}] in {config.config_path}") - else: - # All the options specified under [`section`] in `config` excluding bootstrap defaults. - all_options_under_scope = set(config.values.options(section)) - set( - config.values.defaults - ) - for option in sorted(all_options_under_scope): - if option not in valid_options_under_scope: - error_log.append( - f"Invalid option '{option}' under [{section}] in {config.config_path}" - ) - - if error_log: - for error in error_log: - logger.error(error) - raise ConfigValidationError( - "Invalid config entries detected. See log for details on which entries to update " - "or remove.\n(Specify --no-verify-config to disable this check.)" - ) + section_to_valid_options = {} + for scope in self.known_scope_to_info: + section = GLOBAL_SCOPE_CONFIG_SECTION if scope == GLOBAL_SCOPE else scope + section_to_valid_options[section] = set(self.for_scope(scope, check_deprecations=False)) + global_config.verify(section_to_valid_options) def is_known_scope(self, scope: str) -> bool: """Whether the given scope is known by this instance. diff --git a/src/python/pants/option/options_integration_test.py b/src/python/pants/option/options_integration_test.py index cda284196df..f9777bf0221 100644 --- a/src/python/pants/option/options_integration_test.py +++ b/src/python/pants/option/options_integration_test.py @@ -23,7 +23,7 @@ def test_invalid_options() -> None: } config_errors = [ "ERROR] Invalid option 'invalid_global' under [GLOBAL]", - "ERROR] Invalid scope [invalid_scope]", + "ERROR] Invalid section [invalid_scope]", "ERROR] Invalid option 'bad_option' under [pytest]", ] diff --git a/src/python/pants/option/parser.py b/src/python/pants/option/parser.py index de9110a834e..f00403183b0 100644 --- a/src/python/pants/option/parser.py +++ b/src/python/pants/option/parser.py @@ -6,7 +6,6 @@ import copy import inspect import json -import os import re import typing from collections import defaultdict @@ -19,7 +18,7 @@ from pants.base.build_environment import get_buildroot from pants.base.deprecated import validate_deprecation_semver, warn_or_error -from pants.option.config import Config +from pants.option.config import DEFAULT_SECTION, Config from pants.option.custom_types import ( DictValueComponent, ListValueComponent, @@ -585,14 +584,13 @@ def expand(val_or_str): # Get value from config files, and capture details about its derivation. config_details = None config_section = GLOBAL_SCOPE_CONFIG_SECTION if self._scope == GLOBAL_SCOPE else self._scope - config_default_val_or_str = expand(self._config.get(Config.DEFAULT_SECTION, dest)) + config_default_val_or_str = expand(self._config.get(DEFAULT_SECTION, dest)) config_val_or_str = expand(self._config.get(config_section, dest)) - config_source_file = self._config.get_source_for_option( + config_source_files = self._config.get_sources_for_option( config_section, dest - ) or self._config.get_source_for_option(Config.DEFAULT_SECTION, dest) - if config_source_file is not None: - config_source_file = os.path.relpath(config_source_file) - config_details = f"from {config_source_file}" + ) or self._config.get_sources_for_option(DEFAULT_SECTION, dest) + if config_source_files: + config_details = f"from {', '.join(config_source_files)}" # Get value from environment, and capture details about its derivation. env_vars = self.get_env_var_names(self._scope, dest)