Skip to content

Commit

Permalink
[engine] Implement symlink handling
Browse files Browse the repository at this point in the history
In order to properly invalidate for watchman events, we need to record that we walked symlinks in the product graph. This review adds symlink handling to `engine.exp.fs`, and improves the type safety of filesystem operations by requiring that a PathGlobs object matches exactly one of `Files`, `Dirs` or `Links`.

- Replace `fs.Path` with `fs.Stat` subclasses: `fs.{File,Dir,Link}`
- Add explicit support to PathGlobs for matching directories. A PathGlobs object matches either Files or Dirs, but not both.
- Resolve symlinks by recursively requesting `Files`/`Dirs` for a `Path` projected from a `ReadLink`
- Replace `RecursiveSubDirectories` with use of a `PathGlobs` object recursively matching `Dirs`.
- Add ProjectTree.{lstat, readlink, listdir}
- Split `test_fs.py` from `test_path_globs.py`, and prepare to test ScmProjectTree (see [pantsbuild#3189](pantsbuild#3189))
- Include tests in `test_fs.py` to validate the actual filesystem events that occurred.

Testing Done:
https://travis-ci.org/pantsbuild/pants/builds/124108442

Bugs closed: 3121, 3190

Reviewed at https://rbcommons.com/s/twitter/r/3691/
  • Loading branch information
stuhood committed Apr 19, 2016
1 parent 930b048 commit f83e2b0
Show file tree
Hide file tree
Showing 27 changed files with 721 additions and 332 deletions.
7 changes: 5 additions & 2 deletions src/python/pants/base/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@ python_library(
name = 'build_environment',
sources = ['build_environment.py'],
dependencies = [
'src/python/pants:version',
'src/python/pants/scm:all',
':build_root',
'src/python/pants/scm',
'src/python/pants/scm:git',
'src/python/pants:version',
]
)

python_library(
name = 'project_tree',
sources = ['project_tree.py'],
dependencies = [
'src/python/pants/util:dirutil',
'src/python/pants/util:meta',
'src/python/pants/util:objects',
]
)

Expand Down
44 changes: 37 additions & 7 deletions src/python/pants/base/file_system_project_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,64 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import errno
import os
import stat
from glob import glob1

from pants.base.project_tree import ProjectTree
from pants.base.project_tree import PTSTAT_DIR, PTSTAT_FILE, PTSTAT_LINK, ProjectTree
from pants.util.dirutil import fast_relpath, safe_walk


class FileSystemProjectTree(ProjectTree):
def _join(self, relpath):
if relpath.startswith(os.sep):
raise ValueError('Absolute path "{}" not legal in {}.'.format(relpath, self))
return os.path.join(self.build_root, relpath)

def glob1(self, dir_relpath, glob):
return glob1(os.path.join(self.build_root, dir_relpath), glob)
return glob1(self._join(dir_relpath), glob)

def content(self, file_relpath):
with open(os.path.join(self.build_root, file_relpath), 'rb') as source:
with open(self._join(file_relpath), 'rb') as source:
return source.read()

def isdir(self, relpath):
return os.path.isdir(os.path.join(self.build_root, relpath))
return os.path.isdir(self._join(relpath))

def isfile(self, relpath):
return os.path.isfile(os.path.join(self.build_root, relpath))
return os.path.isfile(self._join(relpath))

def exists(self, relpath):
return os.path.exists(os.path.join(self.build_root, relpath))
return os.path.exists(self._join(relpath))

def lstat(self, relpath):
try:
mode = os.lstat(self._join(relpath)).st_mode
if stat.S_ISLNK(mode):
return PTSTAT_LINK
elif stat.S_ISDIR(mode):
return PTSTAT_DIR
elif stat.S_ISREG(mode):
return PTSTAT_FILE
else:
raise IOError('Unsupported file type in {}: {}'.format(self, relpath))
except (IOError, OSError) as e:
if e.errno == errno.ENOENT:
return None
else:
raise e

def relative_readlink(self, relpath):
return os.readlink(self._join(relpath))

def listdir(self, relpath):
return os.listdir(self._join(relpath))

def walk(self, relpath, topdown=True):
def onerror(error):
raise OSError(getattr(error, 'errno', None), 'Failed to walk below {}'.format(relpath), error)
for root, dirs, files in safe_walk(os.path.join(self.build_root, relpath),
for root, dirs, files in safe_walk(self._join(relpath),
topdown=topdown,
onerror=onerror):
yield fast_relpath(root, self.build_root), dirs, files
Expand Down
38 changes: 38 additions & 0 deletions src/python/pants/base/project_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import os
from abc import abstractmethod

from pants.util.dirutil import fast_relpath
from pants.util.meta import AbstractClass
from pants.util.objects import datatype


logger = logging.getLogger(__name__)
Expand All @@ -32,6 +34,10 @@ def __init__(self, build_root):
def glob1(self, dir_relpath, glob):
"""Returns a list of paths in path that match glob"""

@abstractmethod
def listdir(self, relpath):
"""Return the names of paths in the given directory."""

@abstractmethod
def walk(self, relpath, topdown=True):
"""Walk the file tree rooted at `path`. Works like os.walk but returned root value is relative path."""
Expand All @@ -48,6 +54,38 @@ def isfile(self, relpath):
def exists(self, relpath):
"""Returns True if path exists"""

@abstractmethod
def lstat(self, relpath):
"""Without following symlinks, returns a PTStat object for the path, or None"""

@abstractmethod
def relative_readlink(self, relpath):
"""Execute `readlink` for the given path, which may result in a relative path."""

def readlink(self, relpath):
link_path = self.relative_readlink(relpath)
if os.path.isabs(link_path):
raise IOError('Absolute symlinks not supported in {}: {} -> {}'.format(
self, relpath, link_path))
# In order to enforce that this link does not escape the build_root, we join and
# then remove it.
abs_normpath = os.path.normpath(os.path.join(self.build_root,
os.path.dirname(relpath),
link_path))
return fast_relpath(abs_normpath, self.build_root)

@abstractmethod
def content(self, file_relpath):
"""Returns the content for file at path."""


class PTStat(datatype('PTStat', ['ftype'])):
"""A simple 'Stat' facade that can be implemented uniformly across SCM and posix backends.
:param ftype: Either 'file', 'dir', or 'link'.
"""


PTSTAT_FILE = PTStat('file')
PTSTAT_DIR = PTStat('dir')
PTSTAT_LINK = PTStat('link')
22 changes: 21 additions & 1 deletion src/python/pants/base/scm_project_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
import fnmatch
import logging
import os
from types import NoneType

from pants.base.project_tree import ProjectTree
from pants.base.project_tree import PTSTAT_DIR, PTSTAT_FILE, PTSTAT_LINK, ProjectTree


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -43,6 +44,25 @@ def isfile(self, relpath):
def exists(self, relpath):
return self._reader.exists(self._scm_relpath(relpath))

def lstat(self, relpath):
mode = type(self._reader.lstat(self._scm_relpath(relpath)))
if mode == NoneType:
return None
elif mode == self._reader.Symlink:
return PTSTAT_LINK
elif mode == self._reader.Dir:
return PTSTAT_DIR
elif mode == self._reader.File:
return PTSTAT_FILE
else:
raise IOError('Unsupported file type in {}: {}'.format(self, relpath))

def relative_readlink(self, relpath):
return self._reader.readlink(self._scm_relpath(relpath))

def listdir(self, relpath):
return self._reader.listdir(self._scm_relpath(relpath))

def walk(self, relpath, topdown=True):
for path, dirnames, filenames in self._do_walk(self._scm_relpath(relpath), topdown=topdown):
yield (os.path.relpath(os.path.join(self._scm_worktree, path), self.build_root), dirnames, filenames)
Expand Down
12 changes: 5 additions & 7 deletions src/python/pants/engine/exp/examples/planners.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pants.engine.exp.examples.graph_validator import GraphValidator
from pants.engine.exp.examples.parsers import JsonParser
from pants.engine.exp.examples.sources import Sources
from pants.engine.exp.fs import FileContent, FilesContent, Path, PathGlobs, Paths, create_fs_tasks
from pants.engine.exp.fs import Dirs, File, FileContent, FilesContent, PathGlobs, create_fs_tasks
from pants.engine.exp.graph import create_graph_tasks
from pants.engine.exp.mapper import AddressFamily, AddressMapper
from pants.engine.exp.parser import SymbolTable
Expand Down Expand Up @@ -116,12 +116,10 @@ def select_package_address(jvm_package_name, address_families):

@printing_func
def calculate_package_search_path(jvm_package_name, source_roots):
"""Return Paths for directories where the given JVMPackageName might exist."""
"""Return PathGlobs to match directories where the given JVMPackageName might exist."""
rel_package_dir = jvm_package_name.name.replace('.', os_sep)
if not rel_package_dir.endswith(os_sep):
rel_package_dir += os_sep
specs = [os_path_join(srcroot, rel_package_dir) for srcroot in source_roots.srcroots]
return PathGlobs.create_from_specs('', specs)
return PathGlobs.create_from_specs(Dirs, '', specs)


@printing_func
Expand Down Expand Up @@ -431,7 +429,7 @@ def setup_json_scheduler(build_root, debug=True):
'list': Address,
GenGoal.name(): GenGoal,
'unpickleable': UnpickleableResult,
'ls': Path,
'ls': File,
'cat': FileContent,
}
tasks = [
Expand Down Expand Up @@ -466,7 +464,7 @@ def setup_json_scheduler(build_root, debug=True):
extract_scala_imports),
(Address,
[Select(JVMPackageName),
SelectDependencies(AddressFamily, Paths)],
SelectDependencies(AddressFamily, Dirs)],
select_package_address),
(PathGlobs,
[Select(JVMPackageName),
Expand Down
5 changes: 3 additions & 2 deletions src/python/pants/engine/exp/examples/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from abc import abstractproperty

from pants.engine.exp.addressable import Exactly, addressable
from pants.engine.exp.fs import PathGlobs
from pants.engine.exp.fs import Files, PathGlobs
from pants.engine.exp.objects import Locatable
from pants.engine.exp.struct import Struct

Expand Down Expand Up @@ -59,7 +59,8 @@ def path_globs(self):
This field may be projected to request the content of the files for this Sources object.
"""
return PathGlobs.create(self.spec_path,
return PathGlobs.create(Files,
self.spec_path,
files=self.files,
globs=self.globs,
rglobs=self.rglobs,
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/engine/exp/examples/visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pants.binaries import binary_util
from pants.engine.exp.engine import LocalSerialEngine
from pants.engine.exp.examples.planners import setup_json_scheduler
from pants.engine.exp.fs import PathGlobs
from pants.engine.exp.fs import Files, PathGlobs
from pants.util.contextutil import temporary_file_path


Expand Down Expand Up @@ -80,5 +80,5 @@ def main_filespecs():
build_root, goals, args = pop_build_root_and_goals('[build root path] [filespecs]*', sys.argv[1:])

# Create PathGlobs for each arg relative to the buildroot.
path_globs = [PathGlobs.create('', globs=[arg]) for arg in args]
path_globs = [PathGlobs.create(Files, '', globs=[arg]) for arg in args]
visualize_build_request(build_root, goals, path_globs)
Loading

0 comments on commit f83e2b0

Please sign in to comment.