Switch to using the native options parser values (pantsbuild#21385)

But still compare against the legacy parser so we can warn about discrepancies.
benjyw · Sep 20, 2024 · c50c8fc · c50c8fc
1 parent 1de98db
commit c50c8fc
Show file tree

Hide file tree

Showing 10 changed files with 209 additions and 118 deletions.
diff --git a/docs/notes/2.23.x.md b/docs/notes/2.23.x.md
@@ -21,6 +21,16 @@ We offer [formal sponsorship tiers for companies](https://www.pantsbuild.org/spo
 - The deprecations for the `--changed-dependees` option and the `dependees` goal have expired. Use the equivalent [`--changed-dependents` option](https://www.pantsbuild.org/2.23/reference/subsystems/changed#dependents) or [`dependents` goal](https://www.pantsbuild.org/2.23/reference/goals/dependents) instead.
 
 
+### New Options System
+
+This release switches the Pants [options system](https://www.pantsbuild.org/2.22/docs/using-pants/key-concepts/options) to use the new "native" implementation written in Rust first introduced in the 2.22.x series.
+
+To ensure that this transition is not disruptive, this release will run the new system alongside the "legacy" options system, and compare their results, issuing a warning if they differ.
+
+If you encounter such discrepancies, and you can't resolve them easily, please [reach out to us on Slack or file an issue](https://www.pantsbuild.org/community/getting-help).
+
+The "legacy" system will be removed in the 2.24.x series.
+
 ### Test goal
 
 A new option `--experimental-report-test-result-info` is added to the `[test]` config section. Enabling this option will

diff --git a/src/python/pants/bin/local_pants_runner.py b/src/python/pants/bin/local_pants_runner.py
@@ -105,14 +105,16 @@ def create(
         # Option values are usually computed lazily on demand, but command line options are
         # eagerly computed for validation.
         with options_initializer.handle_unknown_flags(options_bootstrapper, env, raise_=True):
+            # Verify CLI flags.
+            if not build_config.allow_unknown_options:
+                options.verify_args()
+
             for scope, values in options.scope_to_flags.items():
                 if values:
                     # Only compute values if there were any command line options presented.
                     options.for_scope(scope)
 
-        # Verify CLI flags and configs.
-        if not build_config.allow_unknown_options:
-            options.verify_args()
+        # Verify configs.
         if global_bootstrap_options.verify_config:
             options.verify_configs(options_bootstrapper.config)
 

diff --git a/src/python/pants/help/flag_error_help_printer.py b/src/python/pants/help/flag_error_help_printer.py
@@ -41,7 +41,9 @@ def handle_unknown_flags(self, err: UnknownFlagsError):
                 possibilities.update(oshi.collect_scoped_flags())
 
         for flag in err.flags:
-            print(f"Unknown flag {self.maybe_red(flag)} on {err.arg_scope or 'global'} scope")
+            print(
+                f"Unknown flag {self.maybe_red(flag)} in {(err.arg_scope + ' goal') if err.arg_scope else 'global'} context"
+            )
             did_you_mean = difflib.get_close_matches(flag, possibilities)
             if err.arg_scope != GLOBAL_SCOPE and flag in global_flags:
                 # It's a common error to use a global flag in a goal scope, so we special-case it.

diff --git a/src/python/pants/help/help_integration_test.py b/src/python/pants/help/help_integration_test.py
@@ -159,14 +159,14 @@ def test_unknown_goal() -> None:
 def test_unknown_global_flags() -> None:
     pants_run = run_pants(["--pants-workdirx", "goals"])
     pants_run.assert_failure()
-    assert "Unknown flag --pants-workdirx on global scope" in pants_run.stdout
+    assert "Unknown flag --pants-workdirx in global context" in pants_run.stdout
     assert "Did you mean --pants-workdir" in pants_run.stdout
 
 
 def test_unknown_scoped_flags() -> None:
     pants_run = run_pants(["test", "--forcex"])
     pants_run.assert_failure()
-    assert "Unknown flag --forcex on test scope" in pants_run.stdout
+    assert "Unknown flag --forcex in test goal context" in pants_run.stdout
     assert "Did you mean --force" in pants_run.stdout
 
 
@@ -175,7 +175,7 @@ def test_global_flag_in_scoped_position() -> None:
         ["test", "--pants-distdir=dist/"],
     )
     pants_run.assert_failure()
-    assert "Unknown flag --pants-distdir on test scope" in pants_run.stdout
+    assert "Unknown flag --pants-distdir in test goal context" in pants_run.stdout
     assert "Did you mean to use the global --pants-distdir?" in pants_run.stdout
 
 

diff --git a/src/python/pants/option/native_options.py b/src/python/pants/option/native_options.py
@@ -7,15 +7,18 @@
 import logging
 import shlex
 from enum import Enum
+from pathlib import Path
 from typing import Any, Mapping, Optional, Sequence, Tuple
 
+from pants.base.build_environment import get_buildroot
 from pants.engine.internals import native_engine
 from pants.engine.internals.native_engine import PyConfigSource
 from pants.option.config import ConfigSource
-from pants.option.custom_types import _flatten_shlexed_list, shell_str
-from pants.option.errors import OptionsError
+from pants.option.custom_types import _flatten_shlexed_list, dir_option, file_option, shell_str
+from pants.option.errors import BooleanOptionNameWithNo, OptionsError, ParseError
 from pants.option.ranked_value import Rank
-from pants.util.strutil import get_strict_env
+from pants.option.scope import GLOBAL_SCOPE
+from pants.util.strutil import get_strict_env, softwrap
 
 logger = logging.getLogger()
 
@@ -65,12 +68,35 @@ def __init__(
         }
 
     def get(
-        self, *, scope, flags, default, option_type, member_type=None, passthrough=False
+        self,
+        *,
+        scope,
+        dest,
+        flags,
+        default,
+        option_type,
+        member_type=None,
+        choices=None,
+        passthrough=False,
     ) -> Tuple[Any, Rank]:
+        def scope_str() -> str:
+            return "global scope" if scope == GLOBAL_SCOPE else f"scope '{scope}'"
+
         def is_enum(typ):
             # TODO: When we switch to Python 3.11, use: return isinstance(typ, EnumType)
             return inspect.isclass(typ) and issubclass(typ, Enum)
 
+        def apply_callable(callable_type, val_str):
+            try:
+                return callable_type(val_str)
+            except (TypeError, ValueError) as e:
+                if is_enum(callable_type):
+                    choices_str = ", ".join(f"{choice.value}" for choice in callable_type)
+                    raise ParseError(f"Invalid choice '{val_str}'. Choose from: {choices_str}")
+                raise ParseError(
+                    f"Error applying type '{callable_type.__name__}' to option value '{val_str}': {e}"
+                )
+
         # '--foo.bar-baz' -> ['foo', 'bar', 'baz']
         name_parts = flags[-1][2:].replace(".", "-").split("-")
         switch = flags[0][1:] if len(flags) > 1 else None  # '-d' -> 'd'
@@ -79,7 +105,10 @@ def is_enum(typ):
         rust_option_type = option_type
         rust_member_type = member_type
 
-        if option_type is dict:
+        if option_type is bool:
+            if name_parts[0] == "no":
+                raise BooleanOptionNameWithNo(scope, dest)
+        elif option_type is dict:
             # The Python code allows registering default=None for dicts/lists, and forces it to
             # an empty dict/list at registration. Since here we only have access to what the user
             # provided, we do the same.
@@ -129,15 +158,65 @@ def is_enum(typ):
                 if member_type == shell_str:
                     val = _flatten_shlexed_list(val)
                 elif callable(member_type):
-                    val = [member_type(x) for x in val]
+                    val = [apply_callable(member_type, x) for x in val]
                 if passthrough:
                     val += self._native_parser.get_passthrough_args() or []
-            elif is_enum(option_type):
-                val = option_type(val)
             elif callable(option_type):
-                val = option_type(val)
+                val = apply_callable(option_type, val)
+
+            # Validate the value.
+
+            def check_scalar_value(val, choices):
+                if choices is None and is_enum(option_type):
+                    choices = list(option_type)
+                if choices is not None and val not in choices:
+                    raise ParseError(
+                        softwrap(
+                            f"""
+                            `{val}` is not an allowed value for option {dest} in {scope_str()}.
+                            Must be one of: {choices}
+                            """
+                        )
+                    )
+                elif option_type == file_option:
+                    check_file_exists(val, dest, scope_str())
+                elif option_type == dir_option:
+                    check_dir_exists(val, dest, scope_str())
+
+            if isinstance(val, list):
+                for component in val:
+                    check_scalar_value(component, choices)
+                if is_enum(member_type) and len(val) != len(set(val)):
+                    raise ParseError(f"Duplicate enum values specified in list: {val}")
+            elif isinstance(val, dict):
+                for component in val.values():
+                    check_scalar_value(component, choices)
+            else:
+                check_scalar_value(val, choices)
 
         return (val, rank)
 
     def get_unconsumed_flags(self) -> dict[str, tuple[str, ...]]:
         return {k: tuple(v) for k, v in self._native_parser.get_unconsumed_flags().items()}
+
+
+def check_file_exists(val: str, dest: str, scope: str) -> None:
+    error_prefix = f"File value `{val}` for option `{dest}` in `{scope}`"
+    try:
+        path = Path(val)
+        path_with_buildroot = Path(get_buildroot(), val)
+    except TypeError:
+        raise ParseError(f"{error_prefix} cannot be parsed as a file path.")
+    if not path.is_file() and not path_with_buildroot.is_file():
+        raise ParseError(f"{error_prefix} does not exist.")
+
+
+def check_dir_exists(val: str, dest: str, scope: str) -> None:
+    error_prefix = f"Directory value `{val}` for option `{dest}` in `{scope}`"
+    try:
+        path = Path(val)
+        path_with_buildroot = Path(get_buildroot(), val)
+    except TypeError:
+        raise ParseError(f"{error_prefix} cannot be parsed as a directory path.")
+    if not path.is_dir() and not path_with_buildroot.is_dir():
+        raise ParseError(f"{error_prefix} does not exist.")
diff --git a/src/python/pants/option/options.py b/src/python/pants/option/options.py
@@ -12,7 +12,7 @@
 from pants.base.deprecated import warn_or_error
 from pants.option.arg_splitter import ArgSplitter
 from pants.option.config import Config
-from pants.option.errors import ConfigValidationError
+from pants.option.errors import ConfigValidationError, UnknownFlagsError
 from pants.option.native_options import NativeOptionParser
 from pants.option.option_util import is_list_option
 from pants.option.option_value_container import OptionValueContainer, OptionValueContainerBuilder
@@ -277,17 +277,24 @@ def verify_args(self):
         # Consume all known args, and see if any are left.
         # This will have the side-effect of precomputing (and memoizing) options for all scopes.
         for scope in self.known_scope_to_info:
-            # Currently the legacy parser raises UnknownFlagsError in for_scope().
-            # When we switch to the native parser we will manually check get_unconsumed_flags().
             self.for_scope(scope)
-        # TODO: Uncomment this when we switch to the native parser.
-        # unconsumed_flags = self._native_parser.get_unconsumed_flags()
-        # if unconsumed_flags:
-        #     # We may have unconsumed flags in multiple positional contexts, but our
-        #     # error handling expects just one, so pick the first one. After the user
-        #     # fixes that error we will show the next scope.
-        #     scope, flags = next(iter(unconsumed_flags))
-        #     raise UnknownFlagsError(flags, scope)
+        # We implement some global help flags, such as `-h`, `--help`, '-v', `--version`,
+        # as scope aliases (so `--help` is an alias for `help` and so on).
+        # There aren't consumed by the native parser, since they aren't registered as options,
+        # so we must account for them.
+        scope_aliases_that_look_like_flags = set()
+        for si in self.known_scope_to_info.values():
+            scope_aliases_that_look_like_flags.update(
+                sa for sa in si.scope_aliases if sa.startswith("-")
+            )
+
+        for scope, flags in self._native_parser.get_unconsumed_flags().items():
+            flags = tuple(flag for flag in flags if flag not in scope_aliases_that_look_like_flags)
+            if flags:
+                # We may have unconsumed flags in multiple positional contexts, but our
+                # error handling expects just one, so pick the first one. After the user
+                # fixes that error we will show the next scope.
+                raise UnknownFlagsError(flags, scope)
 
     def is_known_scope(self, scope: str) -> bool:
         """Whether the given scope is known by this instance.
@@ -405,35 +412,38 @@ def for_scope(
         :raises pants.option.errors.ConfigValidationError: if the scope is unknown.
         """
 
-        values_builder = OptionValueContainerBuilder()
-        flags_in_scope = self._scope_to_flags.get(scope, [])
-        parse_args_request = self._make_parse_args_request(flags_in_scope, values_builder)
-        legacy_values = self.get_parser(scope).parse_args(
-            parse_args_request, log_warnings=log_parser_warnings
-        )
+        native_values = self.get_parser(scope).parse_args_native(self._native_parser)
         native_mismatch_msgs = []
 
         if self._native_options_validation == NativeOptionsValidation.ignore:
-            native_values = None
+            legacy_values = None
         else:
             try:
-                native_values = self.get_parser(scope).parse_args_native(self._native_parser)
+                values_builder = OptionValueContainerBuilder()
+                flags_in_scope = self._scope_to_flags.get(scope, [])
+                parse_args_request = self._make_parse_args_request(flags_in_scope, values_builder)
+                legacy_values = self.get_parser(scope).parse_args(
+                    parse_args_request, log_warnings=log_parser_warnings
+                )
+            except UnknownFlagsError:
+                # Let the native parser handle unknown flags.
+                legacy_values = None
             except Exception as e:
                 native_mismatch_msgs.append(
-                    f"Failed to parse options with native parser due to error:\n    {e}"
+                    f"Failed to parse options with legacy parser due to error:\n    {e}"
                 )
-                native_values = None
+                legacy_values = None
 
         # Check for any deprecation conditions, which are evaluated using `self._flag_matchers`.
         if check_deprecations:
-            values_builder = legacy_values.to_builder()
-            self._check_and_apply_deprecations(scope, values_builder)
-            legacy_values = values_builder.build()
+            native_values_builder = native_values.to_builder()
+            self._check_and_apply_deprecations(scope, native_values_builder)
+            native_values = native_values_builder.build()
 
-            if native_values:
-                native_values_builder = native_values.to_builder()
-                self._check_and_apply_deprecations(scope, native_values_builder)
-                native_values = native_values_builder.build()
+            if legacy_values:
+                values_builder = legacy_values.to_builder()
+                self._check_and_apply_deprecations(scope, values_builder)
+                legacy_values = values_builder.build()
 
         def listify_tuples(x):
             # Sequence values from the legacy parser can be tuple or list, but those coming from
@@ -445,7 +455,7 @@ def listify_tuples(x):
             else:
                 return x
 
-        if native_values:
+        if legacy_values:
 
             def legacy_val_info(k):
                 if k in legacy_values:
@@ -491,7 +501,7 @@ def log(log_func):
                         If you can't resolve this discrepancy, please reach out to the Pants
                         development team: {doc_url('/community/getting-help')}.
 
-                        The native parser will become the default in 2.23.x, and the legacy parser
+                        The native parser is the default in 2.23.x, and the legacy parser
                         will be removed in 2.24.x. So it is imperative that we find out about any
                         discrepancies during this transition period.
 
@@ -515,8 +525,7 @@ def log(log_func):
                 raise Exception(
                     "Option value mismatches detected, see logs above for details. Aborting."
                 )
-        # TODO: In a future release, switch to the native_values as authoritative.
-        return legacy_values
+        return native_values
 
     def get_fingerprintable_for_scope(
         self,

diff --git a/src/python/pants/option/options_integration_test.py b/src/python/pants/option/options_integration_test.py
@@ -30,13 +30,13 @@ def test_invalid_options() -> None:
     # We error on invalid CLI options before validating the config file.
     result = run_pants(["--pytest-invalid=ALL", "help"], config=config)
     result.assert_failure()
-    assert "Unknown flags --invalid on scope pytest" in result.stderr
+    assert "Unknown flag --pytest-invalid in global context" in result.stdout
     for error in config_errors:
         assert error not in result.stderr
 
     result = run_pants(["help"], config=config)
     result.assert_failure()
-    assert "Unknown flags" not in result.stderr
+    assert "Unknown flags" not in result.stdout
     for error in config_errors:
         assert error in result.stderr