Skip to content

Commit

Permalink
Expose GitWorktree as an @rule (pantsbuild#15030)
Browse files Browse the repository at this point in the history
First part of the suggested changes in [pantsbuild#13757](pantsbuild#13757 (comment)). This should leave `calculate_specs()` working "as is" for now while hopefully allowing something like `await Get(GitResult, GitRequest)` to get `Git` in other rules.
  • Loading branch information
jyggen authored Apr 12, 2022
1 parent cc4e180 commit d0ff0ac
Show file tree
Hide file tree
Showing 10 changed files with 489 additions and 140 deletions.
4 changes: 2 additions & 2 deletions pants.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ build_ignore.add = [
]

ignore_warnings.add = [
# TODO: `toolchain.pants.plugin==0.17.0` must still use the old options system
"DEPRECATED: pants.option.subsystem.register_options"
# TODO: `toolchain.pants.plugin==0.18.0` must still use the old `get_git` implementation.
"DEPRECATED: pants.base.build_environment.get_git"
]

files_not_found_behavior = "error"
Expand Down
165 changes: 160 additions & 5 deletions src/python/pants/base/build_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@

import logging
import os
import subprocess
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from pathlib import Path, PurePath
from typing import Iterable

from pants.base.build_root import BuildRoot
from pants.base.deprecated import deprecated
from pants.engine.internals import native_engine
from pants.vcs.git import Git, GitException
from pants.util.contextutil import pushd
from pants.util.meta import frozen_after_init
from pants.version import VERSION

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -50,17 +55,167 @@ def is_in_container() -> bool:
)


class _GitIinitialized(Enum):
class GitException(Exception):
pass


# TODO: can be removed once `get_git()` is gone.
@frozen_after_init
@dataclass(unsafe_hash=True)
class Git:
worktree: PurePath
_gitdir: PurePath
_gitcmd: str

def __init__(
self,
worktree: os.PathLike[str] | None = None,
*,
gitdir: os.PathLike[str] | None = None,
binary: str = "git",
) -> None:
"""Creates a git object that assumes the git repository is in the cwd by default.
worktree: The path to the git repository working tree directory (typically '.').
gitdir: The path to the repository's git metadata directory (typically '.git').
binary: The path to the git binary to use, 'git' by default.
"""
self.worktree = Path(worktree or os.getcwd()).resolve()
self._gitdir = Path(gitdir).resolve() if gitdir else (self.worktree / ".git")
self._gitcmd = binary

@classmethod
def mount(cls, subdir: str | PurePath | None = None, *, binary: str | PurePath = "git") -> Git:
"""Detect the git working tree above cwd and return it.
:param string subdir: The path to start searching for a git repo.
:param string binary: The path to the git binary to use, 'git' by default.
:returns: a Git object that is configured to operate on the found git repo.
:raises: :class:`GitException` if no git repo could be found.
"""
cmd = [str(binary), "rev-parse", "--show-toplevel"]
if subdir:
with pushd(str(subdir)):
process, out, err = cls._invoke(cmd)
else:
process, out, err = cls._invoke(cmd)
cls._check_result(cmd, process.returncode, err.decode())
return cls(worktree=PurePath(cls._cleanse(out)))

@staticmethod
def _invoke(cmd: list[str]) -> tuple[subprocess.Popen, bytes, bytes]:
try:
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError as e:
# Binary DNE or is not executable
cmd_str = " ".join(cmd)
raise GitException(f"Failed to execute command {cmd_str}: {e!r}")
out, err = process.communicate()
return process, out, err

@classmethod
def _cleanse(cls, output: bytes) -> str:
return output.decode().strip()

@classmethod
def _check_result(cls, cmd: Iterable[str], result: int, failure_msg: str | None = None) -> None:
if result != 0:
cmd_str = " ".join(cmd)
raise GitException(failure_msg or f"{cmd_str} failed with exit code {result}")

@property
def current_rev_identifier(self):
return "HEAD"

@property
def commit_id(self):
return self._check_output(["rev-parse", "HEAD"])

@property
def branch_name(self) -> str | None:
branch = self._check_output(["rev-parse", "--abbrev-ref", "HEAD"])
return None if branch == "HEAD" else branch

def _fix_git_relative_path(self, worktree_path: str, relative_to: PurePath | str) -> str:
return str((self.worktree / worktree_path).relative_to(relative_to))

def changed_files(
self,
from_commit: str | None = None,
include_untracked: bool = False,
relative_to: PurePath | str | None = None,
) -> set[str]:
relative_to = PurePath(relative_to) if relative_to is not None else self.worktree
rel_suffix = ["--", str(relative_to)]
uncommitted_changes = self._check_output(["diff", "--name-only", "HEAD"] + rel_suffix)

files = set(uncommitted_changes.splitlines())
if from_commit:
# Grab the diff from the merge-base to HEAD using ... syntax. This ensures we have just
# the changes that have occurred on the current branch.
committed_cmd = ["diff", "--name-only", from_commit + "...HEAD"] + rel_suffix
committed_changes = self._check_output(committed_cmd)
files.update(committed_changes.split())
if include_untracked:
untracked_cmd = [
"ls-files",
"--other",
"--exclude-standard",
"--full-name",
] + rel_suffix
untracked = self._check_output(untracked_cmd)
files.update(untracked.split())
# git will report changed files relative to the worktree: re-relativize to relative_to
return {self._fix_git_relative_path(f, relative_to) for f in files}

def changes_in(self, diffspec: str, relative_to: PurePath | str | None = None) -> set[str]:
relative_to = PurePath(relative_to) if relative_to is not None else self.worktree
cmd = ["diff-tree", "--no-commit-id", "--name-only", "-r", diffspec]
files = self._check_output(cmd).split()
return {self._fix_git_relative_path(f.strip(), relative_to) for f in files}

# N.B.: Only used by tests.
def commit(self, message: str) -> None:
self._check_call(["commit", "--all", "--message", message])

# N.B.: Only used by tests.
def add(self, *paths: PurePath) -> None:
self._check_call(["add", *(str(path) for path in paths)])

def _check_call(self, args: Iterable[str]) -> None:
cmd = self._create_git_cmdline(args)
self._log_call(cmd)
result = subprocess.call(cmd)
self._check_result(cmd, result)

def _check_output(self, args: Iterable[str]) -> str:
cmd = self._create_git_cmdline(args)
self._log_call(cmd)

process, out, err = self._invoke(cmd)

self._check_result(cmd, process.returncode, err.decode())
return self._cleanse(out)

def _create_git_cmdline(self, args: Iterable[str]) -> list[str]:
return [self._gitcmd, f"--git-dir={self._gitdir}", f"--work-tree={self.worktree}", *args]

def _log_call(self, cmd: Iterable[str]) -> None:
logger.debug("Executing: " + " ".join(cmd))


class _GitInitialized(Enum):
NO = 0


_Git: _GitIinitialized | Git | None = _GitIinitialized.NO
_Git: _GitInitialized | Git | None = _GitInitialized.NO


@deprecated("2.14.0.dev0", "Use QueryRule(MaybeGitWorktree, [GitWorktreeRequest]) instead.")
def get_git() -> Git | None:
"""Returns Git, if available."""
global _Git
if _Git is _GitIinitialized.NO:
if _Git is _GitInitialized.NO:
# We know about Git, so attempt an auto-configure
try:
git = Git.mount()
Expand Down
2 changes: 2 additions & 0 deletions src/python/pants/core/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from pants.engine.internals.parametrize import Parametrize
from pants.goal import anonymous_telemetry, stats_aggregator
from pants.source import source_root
from pants.vcs import git


def rules():
Expand All @@ -68,6 +69,7 @@ def rules():
*config_files.rules(),
*distdir.rules(),
*external_tool.rules(),
*git.rules(),
*source_files.rules(),
*source_root.rules(),
*stats_aggregator.rules(),
Expand Down
93 changes: 93 additions & 0 deletions src/python/pants/core/util_rules/system_binaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

import dataclasses
import hashlib
import logging
import os
import subprocess
from dataclasses import dataclass
from enum import Enum
from textwrap import dedent
Expand All @@ -25,6 +27,8 @@
from pants.util.ordered_set import OrderedSet
from pants.util.strutil import create_path_env_var, pluralize

logger = logging.getLogger(__name__)

# -------------------------------------------------------------------------------------------
# `BinaryPath` types
# -------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -320,6 +324,44 @@ class DiffBinary(BinaryPath):
pass


class GitBinaryException(Exception):
pass


class GitBinary(BinaryPath):
def _invoke_unsandboxed(self, cmd: list[str]) -> str:
"""Invoke the given git command, _without_ the sandboxing provided by the `Process` API.
This API is for internal use only: users should prefer to consume methods of the
`GitWorktree` class.
"""
cmd = [self.path, *cmd]

self._log_call(cmd)

try:
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError as e:
# Binary DNE or is not executable
cmd_str = " ".join(cmd)
raise GitBinaryException(f"Failed to execute command {cmd_str}: {e!r}")
out, err = process.communicate()

self._check_result(cmd, process.returncode, err.decode())

return out.decode().strip()

def _check_result(
self, cmd: Iterable[str], result: int, failure_msg: str | None = None
) -> None:
if result != 0:
cmd_str = " ".join(cmd)
raise GitBinaryException(failure_msg or f"{cmd_str} failed with exit code {result}")

def _log_call(self, cmd: Iterable[str]) -> None:
logger.debug("Executing: " + " ".join(cmd))


# -------------------------------------------------------------------------------------------
# Binaries Rules
# -------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -644,6 +686,16 @@ async def find_diff() -> DiffBinary:
return DiffBinary(first_path.path, first_path.fingerprint)


@rule(desc="Finding the `git` binary", level=LogLevel.DEBUG)
async def find_git() -> GitBinary:
request = BinaryPathRequest(binary_name="git", search_path=SEARCH_PATHS)
paths = await Get(BinaryPaths, BinaryPathRequest, request)
first_path = paths.first_path_or_raise(
request, rationale="track changes to files in your build environment"
)
return GitBinary(first_path.path, first_path.fingerprint)


# -------------------------------------------------------------------------------------------
# Rules for lazy requests
# TODO(#12946): Get rid of this when it becomes possible to use `Get()` with only one arg.
Expand Down Expand Up @@ -678,6 +730,10 @@ class DiffBinaryRequest:
pass


class GitBinaryRequest:
pass


@rule
async def find_zip_wrapper(_: ZipBinaryRequest, zip_binary: ZipBinary) -> ZipBinary:
return zip_binary
Expand Down Expand Up @@ -713,5 +769,42 @@ async def find_diff_wrapper(_: DiffBinaryRequest, diff_binary: DiffBinary) -> Di
return diff_binary


@rule
async def find_git_wrapper(_: GitBinaryRequest, git_binary: GitBinary) -> GitBinary:
return git_binary


def rules():
return [*collect_rules(), *python_bootstrap.rules()]


# -------------------------------------------------------------------------------------------
# Rules for fallible binaries
# -------------------------------------------------------------------------------------------


@dataclass(frozen=True)
class MaybeGitBinary:
git_binary: GitBinary | None = None


@rule(desc="Finding the `git` binary", level=LogLevel.DEBUG)
async def maybe_find_git() -> MaybeGitBinary:
request = BinaryPathRequest(binary_name="git", search_path=SEARCH_PATHS)
paths = await Get(BinaryPaths, BinaryPathRequest, request)
first_path = paths.first_path
if not first_path:
return MaybeGitBinary()

return MaybeGitBinary(GitBinary(first_path.path, first_path.fingerprint))


class MaybeGitBinaryRequest:
pass


@rule
async def maybe_find_git_wrapper(
_: MaybeGitBinaryRequest, maybe_git_binary: MaybeGitBinary
) -> MaybeGitBinary:
return maybe_git_binary
2 changes: 2 additions & 0 deletions src/python/pants/init/engine_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from pants.util.logging import LogLevel
from pants.util.ordered_set import FrozenOrderedSet
from pants.vcs.changed import rules as changed_rules
from pants.vcs.git import rules as git_rules

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -255,6 +256,7 @@ def build_root_singleton() -> BuildRoot:
*fs.rules(),
*environment.rules(),
*desktop.rules(),
*git_rules(),
*graph.rules(),
*options_parsing.rules(),
*process.rules(),
Expand Down
Loading

0 comments on commit d0ff0ac

Please sign in to comment.