Skip to content

Commit

Permalink
[engine] Use scandir, and preserve symlink paths in output
Browse files Browse the repository at this point in the history
This patch fixes an issue where symlinked directories and files were resolved to their canonical names during execution. That meant that a glob for `d.ln/2.txt` (where `d.ln` was a symlink to a directory `a`) might result in a match for the path `a/2.txt`. Instead, users expect that the symlink is treated as if it had the same type as the underlying file.

Additionally, this review switches to using `scandir` to compute directory listings, which should result in 1 syscall per directory listing, rather than N+1.

- Convert `Path` into a holder for a symbolic name and the `Stat` that underlies it.
- Generate directory listings via scandir: fewer syscalls, and fewer FilesystemNodes.
- Satisfy a point lookup for a literal Path using the directory listing that contains it (to allow the same underlying syscall to be reused there).
- Preserve file ordering in EagerFilesetWithSpec, and cover with a test (resolves [pantsbuild#3497](pantsbuild#3497).)

Testing Done:
http://jenkins.pantsbuild.org/job/pantsbuild/job/pants/branch/PR-3575/5/

Bugs closed: 3497, 3575

Reviewed at https://rbcommons.com/s/twitter/r/3991/
  • Loading branch information
stuhood committed Jun 13, 2016
1 parent aa25c82 commit 387bb11
Show file tree
Hide file tree
Showing 39 changed files with 562 additions and 478 deletions.
1 change: 1 addition & 0 deletions 3rdparty/python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pytest-cov>=1.8,<1.9
pytest>=2.6,<2.7
pywatchman==1.3.0
requests>=2.5.0,<2.6
scandir==1.2
setproctitle==1.1.9
setuptools==5.4.1
six>=1.9.0,<2
Expand Down
2 changes: 2 additions & 0 deletions src/python/pants/base/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ python_library(
sources = ['project_tree.py'],
dependencies = [
'3rdparty/python:pathspec',
'3rdparty/python:six',
'src/python/pants/util:dirutil',
'src/python/pants/util:meta',
'src/python/pants/util:objects',
Expand All @@ -27,6 +28,7 @@ python_library(
name = 'file_system_project_tree',
sources = ['file_system_project_tree.py'],
dependencies = [
'3rdparty/python:scandir',
':project_tree',
'src/python/pants/util:dirutil',
]
Expand Down
53 changes: 29 additions & 24 deletions src/python/pants/base/file_system_project_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,21 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import errno
import os
import stat
from glob import glob1

from pants.base.project_tree import PTSTAT_DIR, PTSTAT_FILE, PTSTAT_LINK, ProjectTree
from pants.base.project_tree import Dir, File, Link, ProjectTree
from pants.util.dirutil import fast_relpath, safe_walk


# Use the built-in version of scandir/walk if possible, otherwise
# use the scandir module version
try:
from os import scandir
except ImportError:
from scandir import scandir


class FileSystemProjectTree(ProjectTree):
def _join(self, relpath):
if relpath.startswith(os.sep):
Expand All @@ -23,10 +29,26 @@ def _join(self, relpath):
def _glob1_raw(self, dir_relpath, glob):
return glob1(self._join(dir_relpath), glob)

def _listdir_raw(self, relpath):
# TODO: use scandir which is backported from 3.x
# https://github.com/pantsbuild/pants/issues/3250
return os.listdir(self._join(relpath))
def _scandir_raw(self, relpath):
# Sanity check. TODO: this should probably be added to the ProjectTree interface as
# an optional call, so that we can use it in fs.py rather than applying it by default.
abspath = os.path.normpath(self._join(relpath))
if os.path.realpath(abspath) != abspath:
raise ValueError('scandir for non-canonical path "{}" not supported in {}.'.format(
relpath, self))

for entry in scandir(abspath):
# NB: We don't use `DirEntry.stat`, as the scandir docs indicate that that always requires
# an additional syscall on Unixes.
entry_path = os.path.normpath(os.path.join(relpath, entry.name))
if entry.is_file(follow_symlinks=False):
yield File(entry_path)
elif entry.is_dir(follow_symlinks=False):
yield Dir(entry_path)
elif entry.is_symlink():
yield Link(entry_path)
else:
raise IOError('Unsupported file type in {}: {}'.format(self, entry_path))

def _isdir_raw(self, relpath):
return os.path.isdir(self._join(relpath))
Expand All @@ -44,23 +66,6 @@ def _content_raw(self, file_relpath):
def _relative_readlink_raw(self, relpath):
return os.readlink(self._join(relpath))

def _lstat_raw(self, relpath):
try:
mode = os.lstat(self._join(relpath)).st_mode
if stat.S_ISLNK(mode):
return PTSTAT_LINK
elif stat.S_ISDIR(mode):
return PTSTAT_DIR
elif stat.S_ISREG(mode):
return PTSTAT_FILE
else:
raise IOError('Unsupported file type in {}: {}'.format(self, relpath))
except (IOError, OSError) as e:
if e.errno == errno.ENOENT:
return None
else:
raise e

def _walk_raw(self, relpath, topdown=True):
def onerror(error):
raise OSError(getattr(error, 'errno', None), 'Failed to walk below {}'.format(relpath), error)
Expand Down
85 changes: 47 additions & 38 deletions src/python/pants/base/project_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@

import logging
import os
from abc import abstractmethod
from abc import abstractmethod, abstractproperty

import six
from pathspec.gitignore import GitIgnorePattern
from pathspec.pathspec import PathSpec

Expand Down Expand Up @@ -43,8 +44,8 @@ def _glob1_raw(self, dir_relpath, glob):
"""Returns a list of paths in path that match glob."""

@abstractmethod
def _listdir_raw(self, relpath):
"""Return the names of paths in the given directory."""
def _scandir_raw(self, relpath):
"""Return Stats relative to the root for items in the given directory."""

@abstractmethod
def _isdir_raw(self, relpath):
Expand All @@ -66,10 +67,6 @@ def _content_raw(self, file_relpath):
def _relative_readlink_raw(self, relpath):
"""Execute `readlink` for the given path, which may result in a relative path."""

@abstractmethod
def _lstat_raw(self, relpath):
"""Without following symlinks, returns a PTStat object for the path, or None."""

@abstractmethod
def _walk_raw(self, relpath, topdown=True):
"""Walk the file tree rooted at `path`. Works like os.walk but returned root value is relative path."""
Expand All @@ -80,15 +77,15 @@ def glob1(self, dir_relpath, glob):
return []

matched_files = self._glob1_raw(dir_relpath, glob)
return self.filter_ignored(matched_files, dir_relpath)
prefix = self._relpath_no_dot(dir_relpath)
return self._filter_ignored(matched_files, selector=lambda p: os.path.join(prefix, p))

def listdir(self, relpath):
"""Return the names of paths which are in the given directory and not ignored."""
def scandir(self, relpath):
"""Return paths relative to the root, which are in the given directory and not ignored."""
if self.isignored(relpath, directory=True):
self._raise_access_ignored(relpath)

names = self._listdir_raw(relpath)
return self.filter_ignored(names, relpath)
return self._filter_ignored(self._scandir_raw(relpath), selector=lambda e: e.path)

def isdir(self, relpath):
"""Returns True if path is a directory and is not ignored."""
Expand Down Expand Up @@ -129,16 +126,6 @@ def relative_readlink(self, relpath):
self._raise_access_ignored(relpath)
return self._relative_readlink_raw(relpath)

def lstat(self, relpath):
"""
Without following symlinks, returns a PTStat object for the path, or None
Raises exception if path is ignored.
"""
if self.isignored(self._append_slash_if_dir_path(relpath)):
self._raise_access_ignored(relpath)

return self._lstat_raw(relpath)

def walk(self, relpath, topdown=True):
"""
Walk the file tree rooted at `path`. Works like os.walk but returned root value is relative path.
Expand Down Expand Up @@ -175,17 +162,18 @@ def isignored(self, relpath, directory=False):
match_result = list(self.ignore.match_files([relpath]))
return len(match_result) > 0

def filter_ignored(self, path_list, prefix=''):
"""Takes a list of paths and filters out ignored ones."""
prefix = self._relpath_no_dot(prefix)
prefixed_path_list = [self._append_slash_if_dir_path(os.path.join(prefix, item)) for item in path_list]
ignored_paths = list(self.ignore.match_files(prefixed_path_list))
if len(ignored_paths) == 0:
return path_list
def _filter_ignored(self, entries, selector=None):
"""Given an opaque entry list, filter any ignored entries.
return [fast_relpath(f, prefix).rstrip('/') for f in
[path for path in prefixed_path_list if path not in ignored_paths]
]
:param entries: A list or generator that produces entries to filter.
:param selector: A function that computes a path for an entry relative to the root of the
ProjectTree, or None to use identity.
"""
selector = selector or (lambda x: x)
prefixed_entries = [(self._append_slash_if_dir_path(selector(entry)), entry)
for entry in entries]
ignored_paths = set(self.ignore.match_files(path for path, _ in prefixed_entries))
return [entry for path, entry in prefixed_entries if path not in ignored_paths]

def _relpath_no_dot(self, relpath):
return relpath.lstrip('./') if relpath != '.' else ''
Expand All @@ -206,13 +194,34 @@ def _append_slash_if_dir_path(self, relpath):
return relpath


class PTStat(datatype('PTStat', ['ftype'])):
"""A simple 'Stat' facade that can be implemented uniformly across SCM and posix backends.
class Stat(AbstractClass):
"""An existing filesystem path with a known type, relative to the ProjectTree's buildroot.
:param ftype: Either 'file', 'dir', or 'link'.
Note that in order to preserve these invariants, end-user functions should never directly
instantiate Stat instances.
"""

@abstractproperty
def path(self):
""":returns: The string path for this Stat."""


class File(datatype('File', ['path']), Stat):
"""A file."""

def __new__(cls, path):
return super(File, cls).__new__(cls, six.text_type(path))


class Dir(datatype('Dir', ['path']), Stat):
"""A directory."""

def __new__(cls, path):
return super(Dir, cls).__new__(cls, six.text_type(path))


class Link(datatype('Link', ['path']), Stat):
"""A symbolic link."""

PTSTAT_FILE = PTStat('file')
PTSTAT_DIR = PTStat('dir')
PTSTAT_LINK = PTStat('link')
def __new__(cls, path):
return super(Link, cls).__new__(cls, six.text_type(path))
16 changes: 9 additions & 7 deletions src/python/pants/base/scm_project_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import os
from types import NoneType

from pants.base.project_tree import PTSTAT_DIR, PTSTAT_FILE, PTSTAT_LINK, ProjectTree
from pants.base.project_tree import Dir, File, Link, ProjectTree
from pants.util.dirutil import fast_relpath
from pants.util.memo import memoized

Expand Down Expand Up @@ -38,8 +38,10 @@ def _glob1_raw(self, dir_relpath, glob):
files = self._reader.listdir(self._scm_relpath(dir_relpath))
return [filename for filename in files if fnmatch.fnmatch(filename, glob)]

def _listdir_raw(self, relpath):
return self._reader.listdir(self._scm_relpath(relpath))
def _scandir_raw(self, relpath):
# TODO: Further optimization possible.
for name in self._reader.listdir(self._scm_relpath(relpath)):
yield self._lstat(os.path.join(relpath, name))

def _isdir_raw(self, relpath):
return self._reader.isdir(self._scm_relpath(relpath))
Expand All @@ -57,16 +59,16 @@ def _content_raw(self, file_relpath):
def _relative_readlink_raw(self, relpath):
return self._reader.readlink(self._scm_relpath(relpath))

def _lstat_raw(self, relpath):
def _lstat(self, relpath):
mode = type(self._reader.lstat(self._scm_relpath(relpath)))
if mode == NoneType:
return None
elif mode == self._reader.Symlink:
return PTSTAT_LINK
return Link(relpath)
elif mode == self._reader.Dir:
return PTSTAT_DIR
return Dir(relpath)
elif mode == self._reader.File:
return PTSTAT_FILE
return File(relpath)
else:
raise IOError('Unsupported file type in {}: {}'.format(self, relpath))

Expand Down
2 changes: 2 additions & 0 deletions src/python/pants/engine/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ python_library(
':mapper',
':objects',
':selectors',
'src/python/pants/base:project_tree',
'src/python/pants/build_graph',
]
)
Expand All @@ -95,6 +96,7 @@ python_library(
':addressable',
':fs',
':struct',
'src/python/pants/base:project_tree',
'src/python/pants/build_graph',
'src/python/pants/util:objects',
]
Expand Down
Loading

0 comments on commit 387bb11

Please sign in to comment.