Skip to content

Commit

Permalink
IMPALA-3676: Use clang as a static analysis tool
Browse files Browse the repository at this point in the history
This patch adds a script to run clang-tidy over the whole code
base. It is a first step towards running clang-tidy over patches as a
tool to help users spot bugs before code review.

Because of the number of clang-tidy checks, this patch only addresses
some of them. In particular, only checks starting with 'clang' are
considered. Many of them which are flaky or not part of our style are
excluded from the analysis. This patch also exlcudes some checks which
are part of our current style but which would be too laborious to fix
over the entire codebase, like using nullptr rather than NULL.

This patch also fixes a number of small bugs found by clang-tidy.

Finally, this patch adds the class AlignedNew, the purpose of which is
to provide correct alignment on heap-allocated data. The global new
operator only guarantees 16-byte alignment. A class that includes a
member variable that must be aligned on a k-byte boundary for k>16 can
inherit from AlignedNew<k> to ensure correct alignment on the heap,
quieting clang's -Wover-aligned warning. (Static and stack allocation
are required by the standard to respect the alignment of the type and
its member variables, so no extra code is needed for allocation in
those places.)

Change-Id: I4ed168488cb30ddeccd0087f3840541d858f9c06
Reviewed-on: http://gerrit.cloudera.org:8080/4758
Reviewed-by: Jim Apple <[email protected]>
Tested-by: Internal Jenkins
  • Loading branch information
jbapple-cloudera authored and jenkins committed Nov 4, 2016
1 parent 3220904 commit 9f5c6a5
Show file tree
Hide file tree
Showing 114 changed files with 565 additions and 323 deletions.
94 changes: 94 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
---
Checks: "-*,clang*,\
-clang-analyzer-alpha*,\
-clang-analyzer-core.CallAndMessage,\
-clang-analyzer-core.NonNullParamChecker,\
-clang-analyzer-core.NullDereference,\
-clang-analyzer-core.UndefinedBinaryOperatorResult,\
-clang-analyzer-core.uninitialized.ArraySubscript,\
-clang-analyzer-core.uninitialized.Assign,\
-clang-analyzer-core.uninitialized.Branch,\
-clang-analyzer-deadcode.DeadStores,\
-clang-analyzer-unix.Malloc,\
-clang-analyzer-unix.MallocSizeof,\
-clang-diagnostic-c++98*,\
-clang-diagnostic-cast-align,\
-clang-diagnostic-class-varargs,\
-clang-diagnostic-conversion,\
-clang-diagnostic-covered-switch-default,\
-clang-diagnostic-disabled-macro-expansion,\
-clang-diagnostic-documentation-html,\
-clang-diagnostic-documentation-unknown-command,\
-clang-diagnostic-double-promotion,\
-clang-diagnostic-exit-time-destructors,\
-clang-diagnostic-float-conversion,\
-clang-diagnostic-float-equal,\
-clang-diagnostic-global-constructors,\
-clang-diagnostic-gnu-anonymous-struct,\
-clang-diagnostic-header-hygiene,\
-clang-diagnostic-implicit-fallthrough,\
-clang-diagnostic-mismatched-tags,\
-clang-diagnostic-missing-prototypes,\
-clang-diagnostic-missing-variable-declarations,\
-clang-diagnostic-nested-anon-types,\
-clang-diagnostic-old-style-cast,\
-clang-diagnostic-overlength-strings,\
-clang-diagnostic-packed,\
-clang-diagnostic-padded,\
-clang-diagnostic-return-type-c-linkage,\
-clang-diagnostic-shadow,\
-clang-diagnostic-shorten-64-to-32,\
-clang-diagnostic-sign-compare,\
-clang-diagnostic-sign-conversion,\
-clang-diagnostic-switch-enum,\
-clang-diagnostic-undefined-reinterpret-cast,\
-clang-diagnostic-unreachable-code,\
-clang-diagnostic-unreachable-code-return,\
-clang-diagnostic-unused-command-line-argument,\
-clang-diagnostic-unused-local-typedef,\
-clang-diagnostic-unused-parameter,\
-clang-diagnostic-used-but-marked-unused,\
-clang-diagnostic-vla-extension,\
-clang-diagnostic-weak-vtables"

# Ignore warnings in gutil

HeaderFilterRegex: "be/src/\
(benchmarks\
|bufferpool\
|catalog\
|codegen\
|common\
|exec\
|experiments\
|exprs\
|resourcebroker\
|rpc\
|runtime\
|scheduling\
|service\
|statestore\
|testutil\
|thirdparty\
|transport\
|udf\
|udf_samples\
|util)"

AnalyzeTemporaryDtors: true
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ find_package(LlvmBinaries REQUIRED)

# Find LLVM libraries to link against.
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG"
OR "${CMAKE_BUILD_TYPE}" STREQUAL "ADDRESS_SANITIZER")
OR "${CMAKE_BUILD_TYPE}" STREQUAL "ADDRESS_SANITIZER"
OR "${CMAKE_BUILD_TYPE}" STREQUAL "TIDY")
# Use the LLVM libaries with assertions for debug builds.
set(LLVM_ROOT ${LLVM_DEBUG_ROOT})
endif()
Expand Down
20 changes: 19 additions & 1 deletion be/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,30 @@ SET(CXX_FLAGS_RELEASE "${CXX_GCC_FLAGS} -O3 -DNDEBUG -gdwarf-2")
SET(CXX_FLAGS_ADDRESS_SANITIZER
"${CXX_CLANG_FLAGS} -O1 -g -fsanitize=address -fno-omit-frame-pointer -DADDRESS_SANITIZER")

SET(CXX_FLAGS_TIDY "${CXX_CLANG_FLAGS}")
# Catching unused variables requires an optimization level greater than 0
SET(CXX_FLAGS_TIDY "${CXX_FLAGS_TIDY} -O1")
# Ignore assert() and DCHECK() to avoid dead code errors on "DCHECK(false); return
# nullptr" in impossible default switch/case statements.
SET(CXX_FLAGS_TIDY "${CXX_FLAGS_TIDY} -DNDEBUG")
# Turn all warnings back on. Some will be ignored via .clang-tidy's "Checks" value, but
# this allows different "Checks" settings to be used in different clang-tidy runs without
# recompiling.
SET(CXX_FLAGS_TIDY "${CXX_FLAGS_TIDY} -Wall -W -Weverything")
# The Tidy build is so verbose (becasue of -Weverything) that it is unlikely to be viewed
# in a terminal and most likely will be redirecto to less, a log file, or /dev/null. In
# those places color codes just make the output harder to read.
SET(CXX_FLAGS_TIDY "${CXX_FLAGS_TIDY} -fno-color-diagnostics")

# Set compile flags based on the build type.
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
SET(CMAKE_CXX_FLAGS ${CXX_FLAGS_DEBUG})
elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE")
SET(CMAKE_CXX_FLAGS ${CXX_FLAGS_RELEASE})
elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "ADDRESS_SANITIZER")
SET(CMAKE_CXX_FLAGS "${CXX_FLAGS_ADDRESS_SANITIZER}")
elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "TIDY")
SET(CMAKE_CXX_FLAGS "${CXX_FLAGS_TIDY}")
else()
message(FATAL_ERROR "Unknown build type: ${CMAKE_BUILD_TYPE}")
endif()
Expand All @@ -115,7 +132,8 @@ find_program(CCACHE ccache)
if (CCACHE AND NOT DEFINED ENV{DISABLE_CCACHE})
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "ADDRESS_SANITIZER")
if ("${CMAKE_BUILD_TYPE}" STREQUAL "ADDRESS_SANITIZER"
OR "${CMAKE_BUILD_TYPE}" STREQUAL "TIDY")
# Need to set CCACHE_CPP so that ccache calls clang with the original source file for
# both preprocessing and compilation. Otherwise, ccache will use clang to preprocess
# the file and then call clang with the preprocessed output if not cached. However,
Expand Down
12 changes: 8 additions & 4 deletions be/src/benchmarks/bloom-filter-benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,8 @@ void RunBenchmarks() {
Benchmark suite("insert");
vector<unique_ptr<insert::TestData> > testdata;
for (int ndv = 10000; ndv <= 100 * 1000 * 1000; ndv *= 100) {
for (double fpp = 0.1; fpp >= 0.001; fpp /= 10) {
for (int log10fpp = -1; log10fpp >= -3; --log10fpp) {
const double fpp = pow(10, log10fpp);
testdata.emplace_back(new insert::TestData(BloomFilter::MinLogSpace(ndv, fpp)));
snprintf(name, sizeof(name), "ndv %7dk fpp %6.1f%%", ndv/1000, fpp*100);
suite.AddBenchmark(name, insert::Benchmark, testdata.back().get());
Expand All @@ -292,7 +293,8 @@ void RunBenchmarks() {
Benchmark suite("find");
vector<unique_ptr<find::TestData> > testdata;
for (int ndv = 10000; ndv <= 100 * 1000 * 1000; ndv *= 100) {
for (double fpp = 0.1; fpp >= 0.001; fpp /= 10) {
for (int log10fpp = -1; log10fpp >= -3; --log10fpp) {
const double fpp = pow(10, log10fpp);
testdata.emplace_back(
new find::TestData(BloomFilter::MinLogSpace(ndv, fpp), ndv));
snprintf(name, sizeof(name), "present ndv %7dk fpp %6.1f%%", ndv/1000, fpp*100);
Expand All @@ -309,7 +311,8 @@ void RunBenchmarks() {
Benchmark suite("union");
vector<unique_ptr<either::TestData> > testdata;
for (int ndv = 10000; ndv <= 100 * 1000 * 1000; ndv *= 100) {
for (double fpp = 0.1; fpp >= 0.001; fpp /= 10) {
for (int log10fpp = -1; log10fpp >= -3; --log10fpp) {
const double fpp = pow(10, log10fpp);
testdata.emplace_back(
new either::TestData(BloomFilter::MinLogSpace(ndv, fpp)));
snprintf(name, sizeof(name), "ndv %7dk fpp %6.1f%%", ndv/1000, fpp*100);
Expand All @@ -330,7 +333,8 @@ int main(int argc, char **argv) {
Benchmark suite("initialize");
vector<unique_ptr<int> > testdata;
for (int ndv = 10000; ndv <= 100 * 1000 * 1000; ndv *= 100) {
for (double fpp = 0.1; fpp >= 0.001; fpp /= 10) {
for (int log10fpp = -1; log10fpp >= -3; --log10fpp) {
const double fpp = pow(10, log10fpp);
testdata.emplace_back(new int(BloomFilter::MinLogSpace(ndv, fpp)));
snprintf(name, sizeof(name), "ndv %7dk fpp %6.1f%%", ndv / 1000, fpp * 100);
suite.AddBenchmark(name, initialize::Benchmark, testdata.back().get());
Expand Down
3 changes: 2 additions & 1 deletion be/src/benchmarks/in-predicate-benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,9 @@
#include <gutil/strings/substitute.h>

#include "exprs/in-predicate.h"
#include "exprs/in-predicate-ir.cc"

#include "runtime/decimal-value.h"
#include "runtime/string-value.h"
#include "udf/udf-test-harness.h"
#include "util/benchmark.h"
#include "util/cpu-info.h"
Expand Down
15 changes: 0 additions & 15 deletions be/src/benchmarks/parse-timestamp-benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,21 +58,6 @@ using namespace impala;
// ImpalaTimeStamp 37.41 83.35X
// ImpalaTZTimeStamp 37.39 83.3X

#define VALIDATE 0

#if VALIDATE
#define VALIDATE_RESULT(actual, expected, str) \
if (actual != expected) { \
cout << "Parse Error. " \
<< "String: " << str \
<< ". Parsed: " << actual << endl; \
exit(-1); \
}
#else
#define VALIDATE_RESULT(actual, expected, str)
#endif


struct TestData {
vector<StringValue> data;
vector<string> memory;
Expand Down
2 changes: 1 addition & 1 deletion be/src/catalog/catalog-server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ void CatalogServer::UpdateCatalogTopicCallback(
catalog_update_cv_.notify_one();
}

void CatalogServer::GatherCatalogUpdatesThread() {
[[noreturn]] void CatalogServer::GatherCatalogUpdatesThread() {
while (1) {
unique_lock<mutex> unique_lock(catalog_lock_);
// Protect against spurious wakups by checking the value of topic_updates_ready_.
Expand Down
2 changes: 1 addition & 1 deletion be/src/catalog/catalog-server.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ class CatalogServer {
/// to get the latest set of catalog objects that exist, along with some metadata on
/// each object. The results are stored in the shared catalog_objects_ data structure.
/// Also, explicitly releases free memory back to the OS after each complete iteration.
void GatherCatalogUpdatesThread();
[[noreturn]] void GatherCatalogUpdatesThread();

/// This function determines what items have been added/removed from the catalog
/// since the last heartbeat and builds the next topic update to send. To do this, it
Expand Down
1 change: 0 additions & 1 deletion be/src/codegen/codegen-anyval.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,6 @@ Value* CodegenAnyVal::GetVal(const char* name) {
uint32_t idxs[] = {2, 0};
Value* val = builder_->CreateExtractValue(value_, idxs, name);
return builder_->CreateTrunc(val, codegen_->GetType(type_), name);
break;
}
default:
DCHECK(false) << "Unsupported type: " << type_;
Expand Down
6 changes: 6 additions & 0 deletions be/src/codegen/impala-ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
// All files here must be added explicitly to the codegen/CMakeLists.txt dependency list

#ifdef IR_COMPILE

#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wheader-hygiene"

#include "codegen/codegen-anyval-ir.cc"
#include "exec/aggregation-node-ir.cc"
#include "exec/hash-join-node-ir.cc"
Expand Down Expand Up @@ -54,6 +58,8 @@
#include "udf/udf-ir.cc"
#include "util/hash-util-ir.cc"

#pragma clang diagnostic pop

// Unused function to make sure printf declaration is included in IR module. Used by
// LlvmCodegen::CodegenDebugTrace().
void printf_dummy_fn() {
Expand Down
6 changes: 3 additions & 3 deletions be/src/codegen/llvm-codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ string LlvmCodeGen::cpu_name_;
vector<string> LlvmCodeGen::cpu_attrs_;
unordered_set<string> LlvmCodeGen::gv_ref_ir_fns_;

static void LlvmCodegenHandleError(void* user_data, const std::string& reason,
bool gen_crash_diag) {
[[noreturn]] static void LlvmCodegenHandleError(
void* user_data, const std::string& reason, bool gen_crash_diag) {
LOG(FATAL) << "LLVM hit fatal error: " << reason.c_str();
}

Expand Down Expand Up @@ -1416,7 +1416,7 @@ namespace boost {
/// Handler for exceptions in cross-compiled functions.
/// When boost is configured with BOOST_NO_EXCEPTIONS, it calls this handler instead of
/// throwing the exception.
void throw_exception(std::exception const &e) {
[[noreturn]] void throw_exception(std::exception const& e) {
LOG(FATAL) << "Cannot handle exceptions in codegen'd code " << e.what();
}

Expand Down
3 changes: 0 additions & 3 deletions be/src/common/compiler-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,4 @@
/// decision, e.g. not inlining a small function on a hot path.
#define ALWAYS_INLINE __attribute__((always_inline))

#define CACHE_ALIGNED __attribute__ ((aligned(64)))

#endif

2 changes: 1 addition & 1 deletion be/src/common/init.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ static scoped_ptr<impala::Thread> maintenance_thread;
// time slept. If that exceeds PAUSE_WARN_THRESHOLD_MS, a warning is logged.
static scoped_ptr<impala::Thread> pause_monitor;

static void MaintenanceThread() {
[[noreturn]] static void MaintenanceThread() {
while (true) {
sleep(FLAGS_logbufsecs);

Expand Down
5 changes: 0 additions & 5 deletions be/src/common/logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@
#define VLOG(level) while(false) std::cout
#define VLOG_IS_ON(level) (false)
#else
/// GLOG defines this based on the system but doesn't check if it's already
/// been defined. undef it first to avoid warnings.
/// glog MUST be included before gflags. Instead of including them,
/// our files should include this file instead.
#undef _XOPEN_SOURCE
#include <glog/logging.h>
#include <gflags/gflags.h>
#endif
Expand Down
10 changes: 5 additions & 5 deletions be/src/common/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ class Status {
/// Returns the error message associated with a non-successful status.
const ErrorMsg& msg() const {
DCHECK(msg_ != NULL);
return *msg_;
return *msg_; // NOLINT: clang-tidy thinks this might deref a nullptr
}

/// Add a detail string. Calling this method is only defined on a non-OK message
Expand Down Expand Up @@ -257,10 +257,10 @@ class Status {
};

/// some generally useful macros
#define RETURN_IF_ERROR(stmt) \
do { \
Status __status__ = (stmt); \
if (UNLIKELY(!__status__.ok())) return std::move(__status__); \
#define RETURN_IF_ERROR(stmt) \
do { \
Status __status__ = (stmt); \
if (UNLIKELY(!__status__.ok())) return __status__; \
} while (false)

#define ABORT_IF_ERROR(stmt) \
Expand Down
12 changes: 6 additions & 6 deletions be/src/exec/delimited-text-parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,20 @@ using namespace impala;
DelimitedTextParser::DelimitedTextParser(
int num_cols, int num_partition_keys, const bool* is_materialized_col,
char tuple_delim, char field_delim, char collection_item_delim, char escape_char)
: num_tuple_delims_(0),
: is_materialized_col_(is_materialized_col),
num_tuple_delims_(0),
num_delims_(0),
num_cols_(num_cols),
num_partition_keys_(num_partition_keys),
column_idx_(0),
last_row_delim_offset_(-1),
field_delim_(field_delim),
process_escapes_(escape_char != '\0'),
escape_char_(escape_char),
collection_item_delim_(collection_item_delim),
tuple_delim_(tuple_delim),
current_column_has_escape_(false),
last_char_is_escape_(false),
last_row_delim_offset_(-1),
num_cols_(num_cols),
num_partition_keys_(num_partition_keys),
is_materialized_col_(is_materialized_col),
column_idx_(0),
unfinished_tuple_(false){
// Escape character should not be the same as tuple or col delim unless it is the
// empty delimiter.
Expand Down
Loading

0 comments on commit 9f5c6a5

Please sign in to comment.