Skip to content

Commit

Permalink
first implement v1 of robot impl (pantsbuild#8793)
Browse files Browse the repository at this point in the history
### Problem

See pex-tool/pex#789 for a description of the issue, and https://docs.google.com/document/d/1B_g0Ofs8aQsJtrePPR1PCtSAKgBG1o59AhS_NwfFnbI/edit for a google doc with pros and cons of different approaches.

@jsirois was extremely helpful throughout the development of this feature, and pex-tool/pex#819 and pex-tool/pex#821 in pex `2.0.3` will help to optimize several other aspects of this process when we can unrevert pantsbuild#8787.

**Note:** `src/python/pants/backend/python/subsystems/pex_build_util.py` was removed in this PR, along with all floating references to it.

### Solution

With `--binary-py-generate-ipex`, a `.ipex` file will be created when `./pants binary` is run against a `python_binary()` target. This `.ipex` archive will create a `.pex` file and run it when first executed. The `.ipex` archive contains:
- in `IPEX-INFO`: the source files to inject into the resulting `.pex`, and pypi indices to resolve requirements from.
- in `BOOSTRAP-PEX-INFO`: the `PEX-INFO` of the pex file that *would* have been generated if `--generate-ipex` was False.
- in `ipex.py`: A bootstrap script which will generate a `.pex` file when the `.ipex` file is first executed.

### Result

For a `.ipex` file which hydrates the `tensorflow==1.14.0` dependency when it is first run, this translates to a >100x decrease in file size:
```bash
X> ls dist
total 145M
-rwxr-xr-x 1 dmcclanahan staff 267k Dec 10 21:11 dehydrated.ipex*
-rwxr-xr-x 1 dmcclanahan staff 134M Dec 10 21:11 dehydrated.pex*
```
  • Loading branch information
cosmicexplorer authored Mar 12, 2020
1 parent 86c22f7 commit 3384a5c
Show file tree
Hide file tree
Showing 25 changed files with 708 additions and 197 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
import pkgutil

from pants.backend.python.interpreter_cache import PythonInterpreterCache
from pants.backend.python.subsystems.pex_build_util import (
has_python_requirements,
has_python_sources,
)
from pants.backend.python.targets.python_binary import PythonBinary
from pants.backend.python.targets.python_library import PythonLibrary
from pants.backend.python.targets.python_target import PythonTarget
from pants.backend.python.tasks.resolve_requirements_task_base import ResolveRequirementsTaskBase
from pants.base.exceptions import TaskError
from pants.base.generator import Generator, TemplateData
from pants.base.workunit import WorkUnit, WorkUnitLabel
from pants.python.pex_build_util import PexBuilderWrapper
from pants.python.pex_build_util import (
PexBuilderWrapper,
has_python_requirements,
has_python_sources,
)
from pants.task.lint_task_mixin import LintTaskMixin
from pants.util.dirutil import safe_concurrent_creation, safe_mkdir
from pants.util.memo import memoized_property
Expand Down
5 changes: 5 additions & 0 deletions examples/3rdparty/python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,8 @@ unpacked_whls(
],
within_data_subdir='purelib/tensorflow',
)

files(
name='examples_python_3rdparty',
sources=['**/*'],
)
18 changes: 18 additions & 0 deletions examples/src/python/example/tensorflow_custom_op/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,21 @@ ctypes_compatible_cpp_library(
],
ctypes_native_library=native_artifact(lib_name='tensorflow-zero-out-operator'),
)


python_binary(
name='show-tf-version',
source='show_tf_version.py',
dependencies=[
'examples/3rdparty/python:tensorflow',
],
compatibility=['CPython>=3.6,<4'],
)

files(
name='show-tf-version-files',
sources=['**/*'],
dependencies=[
'examples/3rdparty/python:examples_python_3rdparty',
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import tensorflow as tf

print(f"tf version: {tf.__version__}")
2 changes: 1 addition & 1 deletion src/python/pants/backend/project_info/tasks/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from pants.backend.jvm.tasks.ivy_task_mixin import IvyTaskMixin
from pants.backend.project_info.tasks.export_version import DEFAULT_EXPORT_VERSION
from pants.backend.python.interpreter_cache import PythonInterpreterCache
from pants.backend.python.subsystems.pex_build_util import has_python_requirements
from pants.backend.python.targets.python_requirement_library import PythonRequirementLibrary
from pants.backend.python.targets.python_target import PythonTarget
from pants.backend.python.targets.python_tests import PythonTests
Expand All @@ -32,6 +31,7 @@
from pants.java.distribution.distribution import DistributionLocator
from pants.java.executor import SubprocessExecutor
from pants.java.jar.jar_dependency_utils import M2Coordinate
from pants.python.pex_build_util import has_python_requirements
from pants.task.console_task import ConsoleTask
from pants.util.memo import memoized_property
from pants.util.ordered_set import OrderedSet
Expand Down
7 changes: 6 additions & 1 deletion src/python/pants/backend/python/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,16 @@
from pants.build_graph.build_file_aliases import BuildFileAliases
from pants.build_graph.resources import Resources
from pants.goal.task_registrar import TaskRegistrar as task
from pants.python.pex_build_util import PexBuilderWrapper
from pants.python.python_requirement import PythonRequirement


def global_subsystems():
return python_native_code.PythonNativeCode, subprocess_environment.SubprocessEnvironment
return {
python_native_code.PythonNativeCode,
subprocess_environment.SubprocessEnvironment,
PexBuilderWrapper.Factory,
}


def build_file_aliases():
Expand Down
1 change: 1 addition & 0 deletions src/python/pants/backend/python/rules/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ python_library(
'3rdparty/python:dataclasses',
'3rdparty/python:setuptools',
'src/python/pants/backend/python/subsystems',
'src/python/pants/backend/python/subsystems/ipex',
'src/python/pants/build_graph',
'src/python/pants/engine/legacy:graph',
'src/python/pants/engine:build_files',
Expand Down
7 changes: 7 additions & 0 deletions src/python/pants/backend/python/subsystems/ipex/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

# NB: This target is written into an .ipex file as the main script, and should not have any
# dependencies on another python code! .ipex files should always contain pex and setuptools
# requirements in order to run the main script!
python_library()
129 changes: 129 additions & 0 deletions src/python/pants/backend/python/subsystems/ipex/ipex_launcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

"""Entrypoint script for a "dehydrated" .ipex file generated with --generate-ipex.
This script will "hydrate" a normal .pex file in the same directory, then execute it.
"""

import json
import os
import sys
import tempfile

from pex import resolver
from pex.common import open_zip
from pex.interpreter import PythonInterpreter
from pex.pex_builder import PEXBuilder
from pex.pex_info import PexInfo
from pkg_resources import Requirement

APP_CODE_PREFIX = "user_files/"


def _strip_app_code_prefix(path):
if not path.startswith(APP_CODE_PREFIX):
raise ValueError(
"Path {path} in IPEX-INFO did not begin with '{APP_CODE_PREFIX}'.".format(
path=path, APP_CODE_PREFIX=APP_CODE_PREFIX
)
)
return path[len(APP_CODE_PREFIX) :]


def _log(message):
sys.stderr.write(message + "\n")


def _sanitize_requirements(requirements):
"""Remove duplicate keys such as setuptools or pex which may be injected multiple times into the
resulting ipex when first executed."""
project_names = []
new_requirements = {}

for r in requirements:
r = Requirement(r)
if r.marker and not r.marker.evaluate():
continue
if r.name not in new_requirements:
project_names.append(r.name)
new_requirements[r.name] = str(r)
sanitized_requirements = [new_requirements[n] for n in project_names]

return sanitized_requirements


def modify_pex_info(pex_info, **kwargs):
new_info = json.loads(pex_info.dump())
new_info.update(kwargs)
return PexInfo.from_json(json.dumps(new_info))


def _hydrate_pex_file(self, hydrated_pex_file):
# We extract source files into a temporary directory before creating the pex.
td = tempfile.mkdtemp()

with open_zip(self) as zf:
# Populate the pex with the pinned requirements and distribution names & hashes.
bootstrap_info = PexInfo.from_json(zf.read("BOOTSTRAP-PEX-INFO"))
bootstrap_builder = PEXBuilder(pex_info=bootstrap_info, interpreter=PythonInterpreter.get())

# Populate the pex with the needed code.
try:
ipex_info = json.loads(zf.read("IPEX-INFO").decode("utf-8"))
for path in ipex_info["code"]:
unzipped_source = zf.extract(path, td)
bootstrap_builder.add_source(
unzipped_source, env_filename=_strip_app_code_prefix(path)
)
except Exception as e:
raise ValueError(
"Error: {e}. The IPEX-INFO for this .ipex file was:\n{info}".format(
e=e, info=json.dumps(ipex_info, indent=4)
)
)

# Perform a fully pinned intransitive resolve to hydrate the install cache.
resolver_settings = ipex_info["resolver_settings"]

sanitized_requirements = _sanitize_requirements(bootstrap_info.requirements)
bootstrap_info = modify_pex_info(bootstrap_info, requirements=sanitized_requirements)
bootstrap_builder.info = bootstrap_info

resolved_distributions = resolver.resolve(
requirements=bootstrap_info.requirements,
cache=bootstrap_info.pex_root,
platform="current",
transitive=False,
interpreter=bootstrap_builder.interpreter,
**resolver_settings
)
# TODO: this shouldn't be necessary, as we should be able to use the same 'distributions' from
# BOOTSTRAP-PEX-INFO. When the .ipex is executed, the normal pex bootstrap fails to see these
# requirements or recognize that they should be pulled from the cache for some reason.
for resolved_dist in resolved_distributions:
bootstrap_builder.add_distribution(resolved_dist.distribution)

bootstrap_builder.build(hydrated_pex_file, bytecode_compile=False)


def main(self):
filename_base, ext = os.path.splitext(self)

# If the ipex (this pex) is already named '.pex', ensure the output filename doesn't collide by
# inserting an intermediate '.ipex'!
if ext == ".pex":
hydrated_pex_file = "{filename_base}.ipex.pex".format(filename_base=filename_base)
else:
hydrated_pex_file = "{filename_base}.pex".format(filename_base=filename_base)

if not os.path.exists(hydrated_pex_file):
_log("Hydrating {} to {}...".format(self, hydrated_pex_file))
_hydrate_pex_file(self, hydrated_pex_file)

os.execv(sys.executable, [sys.executable, hydrated_pex_file] + sys.argv[1:])


if __name__ == "__main__":
self = sys.argv[0]
main(self)
62 changes: 0 additions & 62 deletions src/python/pants/backend/python/subsystems/pex_build_util.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

from pants.backend.native.subsystems.native_toolchain import NativeToolchain
from pants.backend.native.targets.native_library import NativeLibrary
from pants.backend.python.subsystems import pex_build_util
from pants.backend.python.targets.python_distribution import PythonDistribution
from pants.base.exceptions import IncompatiblePlatformsError
from pants.engine.rules import rule, subsystem_rule
from pants.python import pex_build_util
from pants.python.python_setup import PythonSetup
from pants.subsystem.subsystem import Subsystem
from pants.util.memo import memoized_property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@

from pants.backend.native.targets.native_library import NativeLibrary
from pants.backend.native.tasks.link_shared_libraries import SharedLibrary
from pants.backend.python.subsystems.pex_build_util import is_local_python_dist
from pants.backend.python.subsystems.python_native_code import PythonNativeCode
from pants.backend.python.targets.python_requirement_library import PythonRequirementLibrary
from pants.base.build_environment import get_buildroot
from pants.base.exceptions import TargetDefinitionException, TaskError
from pants.base.workunit import WorkUnitLabel
from pants.build_graph.address import Address
from pants.python.pex_build_util import is_local_python_dist
from pants.python.python_requirement import PythonRequirement
from pants.python.setup_py_runner import SetupPyRunner
from pants.task.task import Task
Expand Down
8 changes: 4 additions & 4 deletions src/python/pants/backend/python/tasks/gather_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
from pex.pex import PEX
from pex.pex_builder import PEXBuilder

from pants.backend.python.subsystems.pex_build_util import (
from pants.base.exceptions import TaskError
from pants.invalidation.cache_manager import VersionedTargetSet
from pants.python.pex_build_util import (
PexBuilderWrapper,
has_python_sources,
has_resources,
is_python_target,
)
from pants.base.exceptions import TaskError
from pants.invalidation.cache_manager import VersionedTargetSet
from pants.python.pex_build_util import PexBuilderWrapper
from pants.task.task import Task
from pants.util.dirutil import safe_concurrent_creation
from pants.util.ordered_set import OrderedSet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import os

from pants.backend.python.subsystems.pex_build_util import is_local_python_dist
from pants.base.build_environment import get_buildroot
from pants.python.pex_build_util import is_local_python_dist
from pants.task.task import Task
from pants.util.dirutil import safe_mkdir
from pants.util.fileutil import atomic_copy
Expand Down
Loading

0 comments on commit 3384a5c

Please sign in to comment.