Skip to content

Commit

Permalink
[internal] Simplify config parsing. (pantsbuild#14846)
Browse files Browse the repository at this point in the history
There were too many unnecessary abstractions bogging down
the code, and making it harder to adapt and modify.

This change streamlines things by getting rid of the
Single/Chained config classes. Instead the _ConfigValues
class, which already did most of the work, takes a new
path field, so it can encapsulate the source file from which
its values were parsed. Then, the Config ABC becomes a
regular concrete dataclass that exists to directly handle
multiple config files.

This allows us to remove many superfluous methods, exceptions
and logic, that were only there to prop up the abstractions.

[ci skip-rust]

[ci skip-build-wheels]
  • Loading branch information
benjyw authored Mar 18, 2022
1 parent 453ba43 commit 5205b2c
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 325 deletions.
4 changes: 2 additions & 2 deletions pants-plugins/internal_plugins/releases/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from pants.engine.target import Target
from pants.engine.unions import UnionRule
from pants.option.alias import CliAlias
from pants.option.config import _ChainedConfig
from pants.option.config import Config
from pants.option.option_types import DictOption
from pants.option.options_bootstrapper import OptionsBootstrapper
from pants.option.subsystem import Subsystem
Expand Down Expand Up @@ -181,7 +181,7 @@ async def check_default_tools(
SessionValues(
{
OptionsBootstrapper: OptionsBootstrapper(
tuple(), ("./pants",), args, _ChainedConfig(tuple()), CliAlias()
tuple(), ("./pants",), args, Config(tuple()), CliAlias()
)
}
),
Expand Down
264 changes: 67 additions & 197 deletions src/python/pants/option/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,22 @@
from __future__ import annotations

import getpass
import itertools
import logging
import os
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass
from functools import partial
from hashlib import sha1
from typing import Any, ClassVar, Dict, Iterable, List, Mapping, Sequence, Union, cast
from typing import Any, Dict, Iterable, List, Mapping, Union, cast

import toml
from typing_extensions import Protocol

from pants.base.build_environment import get_buildroot
from pants.option.errors import (
ConfigError,
InterpolationMissingOptionError,
NoOptionError,
NoSectionError,
)
from pants.option.errors import ConfigError, ConfigValidationError, InterpolationMissingOptionError
from pants.option.ranked_value import Value
from pants.util.ordered_set import OrderedSet

logger = logging.getLogger(__name__)


# A dict with optional override seed values for buildroot, pants_workdir, and pants_distdir.
SeedValues = Dict[str, Value]
Expand All @@ -46,15 +41,19 @@ def content(self) -> bytes:
raise NotImplementedError()


class Config(ABC):
DEFAULT_SECTION = "DEFAULT"


@dataclass(frozen=True, eq=False)
class Config:
"""Encapsulates config file loading and access, including encapsulation of support for multiple
config files.
Supports variable substitution using old-style Python format strings. E.g., %(var_name)s will be
replaced with the value of var_name.
"""

DEFAULT_SECTION: ClassVar[str] = "DEFAULT"
values: tuple[_ConfigValues, ...]

@classmethod
def load(
Expand All @@ -69,40 +68,40 @@ def load(
DEFAULT section, and be available for use in substitutions. The caller may override some of
these seed values.
"""
single_file_configs = []
config_values = []
for file_content in file_contents:
content_digest = sha1(file_content.content).hexdigest()
normalized_seed_values = cls._determine_seed_values(seed_values=seed_values)

try:
config_values = cls._parse_toml(
file_content.content.decode(), normalized_seed_values
)
config_values.append(cls._parse_toml(file_content, normalized_seed_values))
except Exception as e:
raise ConfigError(
f"Config file {file_content.path} could not be parsed as TOML:\n {e}"
)

single_file_configs.append(
_SingleFileConfig(
config_path=file_content.path,
content_digest=content_digest,
values=config_values,
),
)
return _ChainedConfig(tuple(reversed(single_file_configs)))
return cls(tuple(config_values))

@classmethod
def _parse_toml(
cls, config_content: str, normalized_seed_values: dict[str, str]
cls, config_source: ConfigSource, normalized_seed_values: dict[str, str]
) -> _ConfigValues:
"""Attempt to parse as TOML, raising an exception on failure."""
toml_values = cast(Dict[str, Any], toml.loads(config_content))
toml_values["DEFAULT"] = {
toml_values = cast(Dict[str, Any], toml.loads(config_source.content.decode()))
toml_values[DEFAULT_SECTION] = {
**normalized_seed_values,
**toml_values.get("DEFAULT", {}),
**toml_values.get(DEFAULT_SECTION, {}),
}
return _ConfigValues(toml_values)
return _ConfigValues(config_source.path, toml_values)

def verify(self, section_to_valid_options: dict[str, set[str]]):
error_log = []
for config_values in self.values:
error_log.extend(config_values.get_verification_errors(section_to_valid_options))
if error_log:
for error in error_log:
logger.error(error)
raise ConfigValidationError(
"Invalid config entries detected. See log for details on which entries to update "
"or remove.\n(Specify --no-verify-config to disable this check.)"
)

@staticmethod
def _determine_seed_values(*, seed_values: SeedValues | None = None) -> dict[str, str]:
Expand Down Expand Up @@ -135,46 +134,23 @@ def get(self, section, option) -> str | None:
If the specified section does not exist or is missing a definition for the option, the value
is looked up in the DEFAULT section. If there is still no definition found, returns None.
"""
if not self.has_option(section, option):
return None
return self.get_value(section, option)

@abstractmethod
def configs(self) -> Sequence[_SingleFileConfig]:
"""Returns the underlying single-file configs represented by this object."""
for vals in reversed(self.values):
val = vals.get_value(section, option)
if val is not None:
return val
return None

@abstractmethod
def sources(self) -> list[str]:
"""Returns the sources of this config as a list of filenames."""
return [vals.path for vals in self.values]

@abstractmethod
def sections(self) -> list[str]:
"""Returns the sections in this config (not including DEFAULT)."""

@abstractmethod
def has_section(self, section: str) -> bool:
"""Returns whether this config has the section."""

@abstractmethod
def has_option(self, section: str, option: str) -> bool:
"""Returns whether this config specified a value for the option."""

@abstractmethod
def get_value(self, section: str, option: str) -> str:
"""Returns the value of the option in this config as a string.
Raises NoSectionError if the section doesn't exist. Raises NoOptionError if the option
doesn't exist in the section.
"""

@abstractmethod
def get_source_for_option(self, section: str, option: str) -> str | None:
"""Returns the path to the source file the given option was defined in.
:param section: the scope of the option.
:param option: the name of the option.
:returns: the path to the config file, or None if the option was not defined by a config file.
"""
def get_sources_for_option(self, section: str, option: str) -> list[str]:
"""Returns the path(s) to the source file(s) the given option was defined in."""
paths = []
for vals in reversed(self.values):
if vals.get_value(section, option) is not None:
paths.append(os.path.relpath(vals.path))
return paths


_TomlPrimitive = Union[bool, int, float, str]
Expand All @@ -185,7 +161,8 @@ def get_source_for_option(self, section: str, option: str) -> str | None:
class _ConfigValues:
"""The parsed contents of a TOML config file."""

values: dict[str, Any]
path: str
section_to_values: dict[str, dict[str, Any]]

@staticmethod
def _is_an_option(option_value: _TomlValue | dict) -> bool:
Expand Down Expand Up @@ -281,22 +258,10 @@ def _stringify_val_without_interpolation(self, raw_value: _TomlValue) -> str:
interpolate=False,
)

@property
def sections(self) -> list[str]:
return [scope for scope in self.values if scope != "DEFAULT"]

def has_section(self, section: str) -> bool:
return section in self.values

def has_option(self, section: str, option: str) -> bool:
if not self.has_section(section):
return False
return option in self.values[section] or option in self.defaults

def get_value(self, section: str, option: str) -> str:
section_values = self.values.get(section)
def get_value(self, section: str, option: str) -> str | None:
section_values = self.section_to_values.get(section)
if section_values is None:
raise NoSectionError(section)
return None

stringify = partial(
self._stringify_val,
Expand All @@ -308,7 +273,7 @@ def get_value(self, section: str, option: str) -> str:
if option not in section_values:
if option in self.defaults:
return stringify(raw_value=self.defaults[option])
raise NoOptionError(option, section)
return None

option_value = section_values[option]
if not isinstance(option_value, dict):
Expand All @@ -330,124 +295,29 @@ def get_value(self, section: str, option: str) -> str:
return add_val
return remove_val

def options(self, section: str) -> list[str]:
section_values = self.values.get(section)
if section_values is None:
raise NoSectionError(section)
return [
*section_values.keys(),
*(
default_option
for default_option in self.defaults
if default_option not in section_values
),
]

@property
def defaults(self) -> dict[str, str]:
return {
option: self._stringify_val_without_interpolation(option_val)
for option, option_val in self.values["DEFAULT"].items()
for option, option_val in self.section_to_values[DEFAULT_SECTION].items()
}


@dataclass(frozen=True, eq=False)
class _SingleFileConfig(Config):
"""Config read from a single file."""

config_path: str
content_digest: str
values: _ConfigValues

def configs(self) -> list[_SingleFileConfig]:
return [self]

def sources(self) -> list[str]:
return [self.config_path]

def sections(self) -> list[str]:
return self.values.sections

def has_section(self, section: str) -> bool:
return self.values.has_section(section)

def has_option(self, section: str, option: str) -> bool:
return self.values.has_option(section, option)

def get_value(self, section: str, option: str) -> str:
return self.values.get_value(section, option)

def get_source_for_option(self, section: str, option: str) -> str | None:
if self.has_option(section, option):
return self.sources()[0]
return None

def __repr__(self) -> str:
return f"SingleFileConfig({self.config_path})"

def __eq__(self, other: Any) -> bool:
if not isinstance(other, _SingleFileConfig):
return NotImplemented
return self.config_path == other.config_path and self.content_digest == other.content_digest

def __hash__(self) -> int:
return hash(self.content_digest)


@dataclass(frozen=True)
class _ChainedConfig(Config):
"""Config read from multiple sources."""

# Config instances to chain. Later instances take precedence over earlier ones.
chained_configs: tuple[_SingleFileConfig, ...]

@property
def _configs(self) -> tuple[_SingleFileConfig, ...]:
return self.chained_configs

def configs(self) -> tuple[_SingleFileConfig, ...]:
return self.chained_configs

def sources(self) -> list[str]:
# NB: Present the sources in the order we were given them.
return list(itertools.chain.from_iterable(cfg.sources() for cfg in reversed(self._configs)))

def sections(self) -> list[str]:
ret: OrderedSet[str] = OrderedSet()
for cfg in self._configs:
ret.update(cfg.sections())
return list(ret)

def has_section(self, section: str) -> bool:
for cfg in self._configs:
if cfg.has_section(section):
return True
return False

def has_option(self, section: str, option: str) -> bool:
for cfg in self._configs:
if cfg.has_option(section, option):
return True
return False

def get_value(self, section: str, option: str) -> str:
for cfg in self._configs:
def get_verification_errors(self, section_to_valid_options: dict[str, set[str]]) -> list[str]:
error_log = []
for section, vals in self.section_to_values.items():
if section == DEFAULT_SECTION:
continue
try:
return cfg.get_value(section, option)
except (NoSectionError, NoOptionError):
pass
if not self.has_section(section):
raise NoSectionError(section)
raise NoOptionError(option, section)

def get_source_for_option(self, section: str, option: str) -> str | None:
for cfg in self._configs:
if cfg.has_option(section, option):
return cfg.get_source_for_option(section, option)
return None

def __repr__(self) -> str:
return f"ChainedConfig({self.sources()})"
valid_options_in_section = section_to_valid_options[section]
except KeyError:
error_log.append(f"Invalid section [{section}] in {self.path}")
else:
for option in sorted(set(vals.keys()) - valid_options_in_section):
if option not in valid_options_in_section:
error_log.append(
f"Invalid option '{option}' under [{section}] in {self.path}"
)
return error_log


@dataclass(frozen=True)
Expand Down
Loading

0 comments on commit 5205b2c

Please sign in to comment.