Skip to content

Commit

Permalink
build: Factor dependency extraction code from dist_test into a python…
Browse files Browse the repository at this point in the history
… library

This will allow us to reuse this dependency extraction logic when
creating minicluster test binary artifacts that ship their dependencies.

There are no functional changes in this patch.

I ran dist-test a few times and everything seems to work fine:

 - http://dist-test.cloudera.org/job?job_id=mpercy.1546559376.69253 (run)
 - http://dist-test.cloudera.org/job?job_id=mpercy.1546559905.73493 (loop)

Change-Id: I0b4cbfceb053c61dbb1f1d16716acc8926987af2
Reviewed-on: http://gerrit.cloudera.org:8080/12153
Tested-by: Mike Percy <[email protected]>
Reviewed-by: Adar Dembo <[email protected]>
  • Loading branch information
mpercy committed Jan 4, 2019
1 parent 83b70f8 commit d4481c0
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 59 deletions.
119 changes: 119 additions & 0 deletions build-support/dep_extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os
import re
import subprocess

# Matches the output lines from the 'ldd' tool. For example:
# libcrypto.so.10 => /path/to/usr/lib64/libcrypto.so.10 (0x00007fb0cb0a5000)
#
# Note: The following pattern will not match the following two types of
# dependencies and so they will not be included in the output from this module:
#
# 1. The dynamic linker:
# /lib64/ld-linux-x86-64.so.2 (0x00007f6f7ab79000)
# 2. Linux virtual dynamic shared objects:
# linux-vdso.so.1 (0x00007ffc06cfb000)
#
LDD_RE = re.compile(r'^\s+.+? => (\S+) \(0x.+\)')

class DependencyExtractor(object):
"""
This class extracts native library dependencies from the given executable.
"""
def __init__(self):
self.deps_cache = {}
self.lib_allowed_filter = lambda path: True
self.enable_expand_symlinks = False

def set_library_filter(self, lib_allowed_filter):
"""
Specify a filter predicate that should return True iff the specified
library path should be included in the result from extract_deps().
By default, all libraries are included in the result.
"""
self.lib_allowed_filter = lib_allowed_filter

def set_expand_symlinks(self, expand):
"""
Specify whether symlinks should be expanded in the output from
extract_deps(). By default, symlinks are not expanded. See
expand_symlinks().
"""
self.enable_expand_symlinks = expand

def expand_symlinks(self, deps):
"""
ldd will often point to symlinks. Return a list including any symlink in
the specified dependency list as well as whatever it's pointing to,
recursively.
"""
expanded = []
for path in deps:
expanded.append(path)
while os.path.islink(path):
# TODO(mpercy): os.readlink() can return an absolute path. Should we more carefully handle
# the path concatenation here?
path = os.path.join(os.path.dirname(path), os.readlink(path))
expanded.append(path)
return expanded

def extract_deps(self, exe):
"""
Runs 'ldd' on the provided 'exe' path, returning a list of
any libraries it depends on. Blacklisted libraries are
removed from this list.
If the provided 'exe' is not a binary executable, returns
an empty list.
"""
if (exe.endswith(".jar") or
exe.endswith(".pl") or
exe.endswith(".py") or
exe.endswith(".sh") or
exe.endswith(".txt") or
os.path.isdir(exe)):
return []

if exe not in self.deps_cache:
p = subprocess.Popen(["ldd", exe], stdout=subprocess.PIPE)
out, err = p.communicate()
self.deps_cache[exe] = (out, err, p.returncode)

out, err, rc = self.deps_cache[exe]
if rc != 0:
logging.warning("failed to run ldd on %s", exe)
return []

deps = []
for line in out.splitlines():
match = LDD_RE.match(line)
if not match:
continue
dep = match.group(1)
# Apply the provided predicate.
if not self.lib_allowed_filter(dep):
continue
deps.append(dep)

if self.enable_expand_symlinks:
deps = self.expand_symlinks(deps)
return deps
80 changes: 21 additions & 59 deletions build-support/dist_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import subprocess
import time

from dep_extract import DependencyExtractor
from kudu_util import init_logging

TEST_TIMEOUT_SECS = int(os.environ.get('TEST_TIMEOUT_SECS', '900'))
Expand Down Expand Up @@ -76,10 +77,6 @@
# 262: GTEST_TOTAL_SHARDS=1
TEST_ENV_RE = re.compile('^\d+: (\S+)=(.+)')

# Matches the output lines of 'ldd'. For example:
# libcrypto.so.10 => /path/to/usr/lib64/libcrypto.so.10 (0x00007fb0cb0a5000)
LDD_RE = re.compile(r'^\s+.+? => (\S+) \(0x.+\)')

DEPS_FOR_ALL = \
["build-support/stacktrace_addr2line.pl",
"build-support/run-test.sh",
Expand Down Expand Up @@ -219,14 +216,21 @@ def get_test_executions(tests_regex, extra_args=None):
return execs


def is_lib_blacklisted(lib):
def is_lib_whitelisted(lib):
# No need to ship things like libc, libstdcxx, etc.
if lib.startswith("/lib") or lib.startswith("/usr"):
return True
return False
return False
return True


def create_dependency_extractor():
dep_extractor = DependencyExtractor()
dep_extractor.set_library_filter(is_lib_whitelisted)
dep_extractor.set_expand_symlinks(True)
return dep_extractor


def get_base_deps():
def get_base_deps(dep_extractor):
deps = []
for d in DEPS_FOR_ALL:
d = os.path.realpath(rel_to_abs(d))
Expand All @@ -236,7 +240,7 @@ def get_base_deps():
# DEPS_FOR_ALL may include binaries whose dependencies are not dependencies
# of the test executable. We must include those dependencies in the archive
# for the binaries to be usable.
deps.extend(ldd_deps(d))
deps.extend(dep_extractor.extract_deps(d))
return deps


Expand Down Expand Up @@ -265,51 +269,7 @@ def copy_system_library(lib):
shutil.copy2(rel_to_abs(lib), dst)
return dst

LDD_CACHE={}
def ldd_deps(exe):
"""
Runs 'ldd' on the provided 'exe' path, returning a list of
any libraries it depends on. Blacklisted libraries are
removed from this list.
If the provided 'exe' is not a binary executable, returns
an empty list.
"""
if (exe.endswith(".jar") or
exe.endswith(".pl") or
exe.endswith(".py") or
exe.endswith(".sh") or
exe.endswith(".txt") or
os.path.isdir(exe)):
return []
if exe not in LDD_CACHE:
p = subprocess.Popen(["ldd", exe], stdout=subprocess.PIPE)
out, err = p.communicate()
LDD_CACHE[exe] = (out, err, p.returncode)
out, err, rc = LDD_CACHE[exe]
if rc != 0:
logging.warning("failed to run ldd on %s", exe)
return []
ret = []
for l in out.splitlines():
m = LDD_RE.match(l)
if not m:
continue
lib = m.group(1)
if is_lib_blacklisted(lib):
continue
path = m.group(1)
ret.append(m.group(1))

# ldd will often point to symlinks. We need to upload the symlink
# as well as whatever it's pointing to, recursively.
while os.path.islink(path):
path = os.path.join(os.path.dirname(path), os.readlink(path))
ret.append(path)
return ret


def create_archive_input(staging, execution,
def create_archive_input(staging, execution, dep_extractor,
collect_tmpdir=False):
"""
Generates .gen.json and .isolate files corresponding to the
Expand All @@ -325,8 +285,8 @@ def create_archive_input(staging, execution,
argv[1] = rel_test_exe
files = []
files.append(rel_test_exe)
deps = ldd_deps(abs_test_exe)
deps.extend(get_base_deps())
deps = dep_extractor.extract_deps(abs_test_exe)
deps.extend(get_base_deps(dep_extractor))

# Deduplicate dependencies included via DEPS_FOR_ALL.
for d in set(deps):
Expand Down Expand Up @@ -480,8 +440,9 @@ def run_tests(parser, options):
for e in executions:
e.argv.extend(options.extra_args)
staging = StagingDir.new()
dep_extractor = create_dependency_extractor()
for execution in executions:
create_archive_input(staging, execution,
create_archive_input(staging, execution, dep_extractor,
collect_tmpdir=options.collect_tmpdir)
run_isolate(staging)
retry_all = RETRY_ALL_TESTS > 0
Expand Down Expand Up @@ -545,8 +506,9 @@ def loop_test(parser, options):
e = executions[0]
e.env["GTEST_TOTAL_SHARDS"] = 1
e.env["GTEST_SHARD_INDEX"] = 0
dep_extractor = create_dependency_extractor()
for execution in executions:
create_archive_input(staging, execution,
create_archive_input(staging, execution, dep_extractor,
collect_tmpdir=options.collect_tmpdir)
run_isolate(staging)
create_task_json(staging, options.num_instances)
Expand Down Expand Up @@ -616,7 +578,7 @@ def add_java_subparser(subparsers):


def dump_base_deps(parser, options):
print json.dumps(get_base_deps())
print json.dumps(get_base_deps(create_dependency_extractor()))


def add_internal_commands(subparsers):
Expand Down

0 comments on commit d4481c0

Please sign in to comment.