Skip to content

Commit

Permalink
ARROW-5802: [CI][Archery] Dockerify lint utilities
Browse files Browse the repository at this point in the history
Adds support for linting in archery. It knows how to lint with clang-format, cpplint, lint_cpp_cli, clang-tidy, cmake-format, iwyu, flake8, apache-rat, r and rust. By default, it'll ignore iwyu and clang-tidy.

- Fix CMake iwyu target
- Add CMake lint_cpp_cli target
- Remove pandas dependency from archery (was only used for the median)
- Enable lint entry in github actions
- Create a minimal docker image for linting purposes
- Add support for apache-rat in archery (we can remove rat audit files afterward)

Closes apache#5580 from fsaintjacques/ARROW-5802-lint-docker and squashes the following commits:

4139ff6 <François Saint-Jacques> Don't preserve tmpdir on lint
9deaa07 <François Saint-Jacques> Enable cmake-format linter & address comments
d9a3859 <François Saint-Jacques> Preserve iwyu docker-compose image
c1d5e1f <François Saint-Jacques> Address comments
ccb81da <François Saint-Jacques> Fix docker image
a67618c <François Saint-Jacques> Add hadolint linter.
db2913b <François Saint-Jacques> Move java to lang/
d7e777b <François Saint-Jacques> Add rust linter.
b7014f3 <François Saint-Jacques> Add custom bool type to support "ON" and "OFF"
e27e477 <François Saint-Jacques> Add R linter
bc6d419 <François Saint-Jacques> ARROW-5802:  Dockerify lint

Authored-by: François Saint-Jacques <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
  • Loading branch information
fsaintjacques authored and wesm committed Oct 8, 2019
1 parent 3d55122 commit 583fb7e
Show file tree
Hide file tree
Showing 23 changed files with 725 additions and 134 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/linux-docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
label:
- C++ w/ clang-7 & system packages
- Rust
- Lint, Release tests
include:
- label: C++ w/ clang-7 & system packages
image: cpp-system-deps
Expand All @@ -36,6 +37,11 @@ jobs:
image: rust
skip_expression: |
${ARROW_CI_RUST_AFFECTED} != "1"
- label: Lint, Release tests
image: lint
# Don't skip
skip_expression: |
"1" != "1"
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@master
Expand Down
57 changes: 57 additions & 0 deletions ci/docker/Dockerfile.lint
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

FROM hadolint/hadolint:v1.17.2 AS hadolint

FROM ubuntu:18.04

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

RUN apt-get update && \
apt-get install -y -q \
build-essential \
clang-7 \
clang-format-7 \
clang-tidy-7 \
clang-tools-7 \
cmake \
curl \
git \
libclang-7-dev \
ninja-build \
openjdk-11-jdk-headless \
python3 \
python3-dev \
python3-pip \
ruby \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Use python3 by default in scripts
RUN ln -s /usr/bin/python3 /usr/local/bin/python
RUN pip3 install flake8 cmake_format==0.5.2

# Rust linter
RUN curl https://sh.rustup.rs -sSf | \
sh -s -- --default-toolchain stable -y
ENV PATH /root/.cargo/bin:$PATH
RUN rustup component add rustfmt

# Docker linter
COPY --from=hadolint /bin/hadolint /usr/bin/hadolint

CMD ["/arrow/ci/docker/lint.sh"]
17 changes: 11 additions & 6 deletions dev/lint/run_clang_tidy.sh → ci/docker/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,15 @@
# specific language governing permissions and limitations
# under the License.

set -ex
set -eu

mkdir -p /build/lint
pushd /build/lint
cmake -GNinja /arrow/cpp
ninja check-clang-tidy
popd
: ${ARROW_HOME:=/arrow}

install_archery() {
pip3 install -e ${ARROW_HOME}/dev/archery
export LC_ALL=C.UTF-8
export LANG=C.UTF-8
}

install_archery
archery lint
16 changes: 6 additions & 10 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,12 @@ if(${CLANG_TIDY_FOUND})
${ARROW_LINT_QUIET})
endif()

if(ARROW_ONLY_LINT)
return()
endif()
if(UNIX)
add_custom_target(iwyu ${BUILD_SUPPORT_DIR}/iwyu/iwyu.sh)
endif(UNIX)

add_custom_target(lint_cpp_cli ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/lint_cpp_cli.py
${CMAKE_CURRENT_SOURCE_DIR}/src)

#
# Set up various options
Expand Down Expand Up @@ -579,13 +582,6 @@ if(${INFER_FOUND})
3)
endif()

#
# "make iwyu" target
#
if(UNIX)
add_custom_target(iwyu ${BUILD_SUPPORT_DIR}/iwyu/iwyu.sh)
endif(UNIX)

#
# Linker and Dependencies
#
Expand Down
28 changes: 14 additions & 14 deletions cpp/build-support/iwyu/iwyu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,13 @@
# specific language governing permissions and limitations
# under the License.
#
set -x
set -uo pipefail

ROOT=$(cd $(dirname $BASH_SOURCE)/../../..; pwd)

IWYU_LOG=$(mktemp -t arrow-cpp-iwyu.XXXXXX)
trap "rm -f $IWYU_LOG" EXIT

echo "Logging IWYU to $IWYU_LOG"

IWYU_MAPPINGS_PATH="$ROOT/cpp/build-support/iwyu/mappings"
IWYU_ARGS="--mapping_file=$IWYU_MAPPINGS_PATH/boost-all.imp \
--mapping_file=$IWYU_MAPPINGS_PATH/boost-all-private.imp \
Expand All @@ -37,15 +35,20 @@ IWYU_ARGS="--mapping_file=$IWYU_MAPPINGS_PATH/boost-all.imp \

set -e

if [ "$1" == "all" ]; then
affected_files() {
pushd $ROOT > /dev/null
local commit=$($ROOT/cpp/build-support/get-upstream-commit.sh)
git diff --name-only $commit | awk '/\.(c|cc|h)$/'
popd > /dev/null
}

if [[ "${1:-}" == "all" ]]; then
python $ROOT/cpp/build-support/iwyu/iwyu_tool.py -p ${IWYU_COMPILATION_DATABASE_PATH:-.} \
-- $IWYU_ARGS | awk -f $ROOT/cpp/build-support/iwyu/iwyu-filter.awk
else
# Build the list of updated files which are of IWYU interest.
file_list_tmp=$(git diff --name-only \
$($ROOT/cpp/build-support/get-upstream-commit.sh) | grep -E '\.(c|cc|h)$')
file_list_tmp=$(affected_files)
if [ -z "$file_list_tmp" ]; then
echo "IWYU verification: no updates on related files, declaring success"
exit 0
fi

Expand All @@ -56,16 +59,13 @@ else
IWYU_FILE_LIST="$IWYU_FILE_LIST $ROOT/$p"
done

python $ROOT/cpp/build-support/iwyu/iwyu_tool.py -p ${IWYU_COMPILATION_DATABASE_PATH:-.} $IWYU_FILE_LIST -- \
$IWYU_ARGS | awk -f $ROOT/cpp/build-support/iwyu/iwyu-filter.awk | \
tee $IWYU_LOG
python $ROOT/cpp/build-support/iwyu/iwyu_tool.py \
-p ${IWYU_COMPILATION_DATABASE_PATH:-.} $IWYU_FILE_LIST -- \
$IWYU_ARGS | awk -f $ROOT/cpp/build-support/iwyu/iwyu-filter.awk > $IWYU_LOG
fi

if [ -s "$IWYU_LOG" ]; then
# The output is not empty: the changelist needs correction.
cat $IWYU_LOG 1>&2
exit 1
fi

# The output is empty: the changelist looks good.
echo "IWYU verification: the changes look good"
exit 0
17 changes: 12 additions & 5 deletions dev/archery/archery/benchmark/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,28 @@
# specific language governing permissions and limitations
# under the License.

import pandas as pa

def median(values):
n = len(values)
if n == 0:
raise ValueError("median requires at least one value")
elif n % 2 == 0:
return (values[(n // 2) - 1] + values[n // 2]) / 2
else:
return values[n // 2]


class Benchmark:
def __init__(self, name, unit, less_is_better, values, stats=None):
self.name = name
self.unit = unit
self.less_is_better = less_is_better
self.values = pa.Series(values)
self.statistics = self.values.describe()
self.values = sorted(values)
self.median = median(self.values)

@property
def value(self):
median = "50%"
return float(self.statistics[median])
return self.median

def __repr__(self):
return f"Benchmark[name={self.name},value={self.value}]"
Expand Down
95 changes: 71 additions & 24 deletions dev/archery/archery/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,51 @@
# under the License.

import click
from contextlib import contextmanager
import json
import logging
import sys
from tempfile import mkdtemp, TemporaryDirectory

from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD
from .benchmark.runner import BenchmarkRunner
from .lang.cpp import CppCMakeDefinition, CppConfiguration
from .utils.codec import JsonEncoder
from .utils.lint import linter, LintValidationException
from .utils.logger import logger, ctx as log_ctx
from .utils.source import ArrowSources
from .utils.tmpdir import tmpdir

# Set default logging to INFO in command line.
logging.basicConfig(level=logging.INFO)


class ArrowBool(click.types.BoolParamType):
"""
ArrowBool supports the 'ON' and 'OFF' values on top of the values
supported by BoolParamType. This is convenient to port script which exports
CMake options variables.
"""
name = "boolean"

def convert(self, value, param, ctx):
if isinstance(value, str):
lowered = value.lower()
if lowered == "on":
return True
elif lowered == "off":
return False

return super().convert(value, param, ctx)


BOOL = ArrowBool()


@click.group()
@click.option("--debug", type=bool, is_flag=True, default=False,
@click.option("--debug", type=BOOL, is_flag=True, default=False,
help="Increase logging with debugging output.")
@click.option("--pdb", type=bool, is_flag=True, default=False,
@click.option("--pdb", type=BOOL, is_flag=True, default=False,
help="Invoke pdb on uncaught exception.")
@click.option("-q", "--quiet", type=bool, is_flag=True, default=False,
@click.option("-q", "--quiet", type=BOOL, is_flag=True, default=False,
help="Silence executed commands.")
@click.pass_context
def archery(ctx, debug, pdb, quiet):
Expand Down Expand Up @@ -107,25 +129,25 @@ def _apply_options(cmd, options):
@click.option("--warn-level", default="production", type=warn_level_type,
help="Controls compiler warnings -W(no-)error.")
# components
@click.option("--with-tests", default=True, type=bool,
@click.option("--with-tests", default=True, type=BOOL,
help="Build with tests.")
@click.option("--with-benchmarks", default=False, type=bool,
@click.option("--with-benchmarks", default=False, type=BOOL,
help="Build with benchmarks.")
@click.option("--with-python", default=True, type=bool,
@click.option("--with-python", default=True, type=BOOL,
help="Build with python extension.")
@click.option("--with-parquet", default=False, type=bool,
@click.option("--with-parquet", default=False, type=BOOL,
help="Build with parquet file support.")
@click.option("--with-gandiva", default=False, type=bool,
@click.option("--with-gandiva", default=False, type=BOOL,
help="Build with Gandiva expression compiler support.")
@click.option("--with-plasma", default=False, type=bool,
@click.option("--with-plasma", default=False, type=BOOL,
help="Build with Plasma object store support.")
@click.option("--with-flight", default=False, type=bool,
@click.option("--with-flight", default=False, type=BOOL,
help="Build with Flight rpc support.")
@click.option("--cmake-extras", type=str, multiple=True,
help="Extra flags/options to pass to cmake invocation. "
"Can be stacked")
# misc
@click.option("-f", "--force", type=bool, is_flag=True, default=False,
@click.option("-f", "--force", type=BOOL, is_flag=True, default=False,
help="Delete existing build directory if found.")
@click.option("--targets", type=str, multiple=True,
help="Generator targets to run. Can be stacked.")
Expand Down Expand Up @@ -165,13 +187,38 @@ def build(ctx, src, build_dir, force, targets, **kwargs):
build.run(target)


@contextmanager
def tmpdir(preserve, prefix="arrow-bench-"):
if preserve:
yield mkdtemp(prefix=prefix)
else:
with TemporaryDirectory(prefix=prefix) as tmp:
yield tmp
@archery.command(short_help="Lint Arrow source directory")
@click.option("--src", metavar="<arrow_src>", default=ArrowSources.find(),
callback=validate_arrow_sources,
help="Specify Arrow source directory")
@click.option("--with-clang-format", default=True, type=BOOL,
help="Ensure formatting of C++ files.")
@click.option("--with-cpplint", default=True, type=BOOL,
help="Ensure linting of C++ files with cpplint.")
@click.option("--with-clang-tidy", default=False, type=BOOL,
help="Lint C++ with clang-tidy.")
@click.option("--with-iwyu", default=False, type=BOOL,
help="Lint C++ with Include-What-You-Use (iwyu).")
@click.option("--with-flake8", default=True, type=BOOL,
help="Lint python files with flake8.")
@click.option("--with-cmake-format", default=True, type=BOOL,
help="Lint CMakeFiles.txt files with cmake-format.py.")
@click.option("--with-rat", default=True, type=BOOL,
help="Lint files for license violation via apache-rat.")
@click.option("--with-r", default=True, type=BOOL,
help="Lint r files.")
@click.option("--with-rust", default=True, type=BOOL,
help="Lint rust files.")
@click.option("--with-docker", default=True, type=BOOL,
help="Lint docker images with hadolint.")
@click.option("--fix", type=BOOL, default=False,
help="Toggle fixing the lint errors if the linter supports it.")
@click.pass_context
def lint(ctx, src, **kwargs):
try:
linter(src, **kwargs)
except LintValidationException:
sys.exit(1)


@archery.group()
Expand All @@ -190,7 +237,7 @@ def benchmark_common_options(cmd):
default=ArrowSources.find(),
callback=validate_arrow_sources,
help="Specify Arrow source directory"),
click.option("--preserve", type=bool, default=False, show_default=True,
click.option("--preserve", type=BOOL, default=False, show_default=True,
is_flag=True,
help="Preserve workspace for investigation."),
click.option("--output", metavar="<output>",
Expand Down Expand Up @@ -226,7 +273,7 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
**kwargs):
""" List benchmark suite.
"""
with tmpdir(preserve) as root:
with tmpdir(preserve=preserve) as root:
logger.debug(f"Running benchmark {rev_or_path}")

conf = CppConfiguration(
Expand Down Expand Up @@ -281,7 +328,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
\b
archery benchmark run --output=run.json
"""
with tmpdir(preserve) as root:
with tmpdir(preserve=preserve) as root:
logger.debug(f"Running benchmark {rev_or_path}")

conf = CppConfiguration(
Expand Down Expand Up @@ -379,7 +426,7 @@ def benchmark_diff(ctx, src, preserve, output, cmake_extras,
# This should not recompute the benchmark from run.json
archery --quiet benchmark diff WORKSPACE run.json > result.json
"""
with tmpdir(preserve) as root:
with tmpdir(preserve=preserve) as root:
logger.debug(f"Comparing {contender} (contender) with "
f"{baseline} (baseline)")

Expand Down
Loading

0 comments on commit 583fb7e

Please sign in to comment.