Skip to content

Commit

Permalink
[analyzer] Make StmtDataCollector customizable
Browse files Browse the repository at this point in the history
Summary:
This moves the data collection macro calls for Stmt nodes
to lib/AST/StmtDataCollectors.inc

Users can subclass ConstStmtVisitor and include StmtDataCollectors.inc
to define visitor methods for each Stmt subclass. This makes it also
possible to customize the visit methods as exemplified in
lib/Analysis/CloneDetection.cpp.

Move helper methods for data collection to a new module,
AST/DataCollection.

Add data collection for DeclRefExpr, MemberExpr and some literals.

Reviewers: arphaman, teemperor!

Subscribers: mgorny, xazax.hun, cfe-commits

Differential Revision: https://reviews.llvm.org/D36664

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@311569 91177308-0d34-0410-b5e6-96231b3b80d8
(cherry picked from commit 6083032)
  • Loading branch information
krobelus authored and haoNoQ committed Oct 3, 2017
1 parent 8f1e472 commit 9fb7790
Show file tree
Hide file tree
Showing 8 changed files with 499 additions and 234 deletions.
65 changes: 65 additions & 0 deletions include/clang/AST/DataCollection.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//===--- DatatCollection.h --------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
/// \brief This file declares helper methods for collecting data from AST nodes.
///
/// To collect data from Stmt nodes, subclass ConstStmtVisitor and include
/// StmtDataCollectors.inc after defining the macros that you need. This
/// provides data collection implementations for most Stmt kinds. Note
/// that that code requires some conditions to be met:
///
/// - There must be a method addData(const T &Data) that accepts strings,
/// integral types as well as QualType. All data is forwarded using
/// to this method.
/// - The ASTContext of the Stmt must be accessible by the name Context.
///
/// It is also possible to override individual visit methods. Have a look at
/// the DataCollector in lib/Analysis/CloneDetection.cpp for a usage example.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_AST_DATACOLLECTION_H
#define LLVM_CLANG_AST_DATACOLLECTION_H

#include "clang/AST/ASTContext.h"

namespace clang {
namespace data_collection {

/// Returns a string that represents all macro expansions that expanded into the
/// given SourceLocation.
///
/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations
/// A and B are expanded from the same macros in the same order.
std::string getMacroStack(SourceLocation Loc, ASTContext &Context);

/// Utility functions for implementing addData() for a consumer that has a
/// method update(StringRef)
template <class T>
void addDataToConsumer(T &DataConsumer, llvm::StringRef Str) {
DataConsumer.update(Str);
}

template <class T> void addDataToConsumer(T &DataConsumer, const QualType &QT) {
addDataToConsumer(DataConsumer, QT.getAsString());
}

template <class T, class Type>
typename std::enable_if<
std::is_integral<Type>::value || std::is_enum<Type>::value ||
std::is_convertible<Type, size_t>::value // for llvm::hash_code
>::type
addDataToConsumer(T &DataConsumer, Type Data) {
DataConsumer.update(StringRef(reinterpret_cast<char *>(&Data), sizeof(Data)));
}

} // end namespace data_collection
} // end namespace clang

#endif // LLVM_CLANG_AST_DATACOLLECTION_H
190 changes: 0 additions & 190 deletions include/clang/Analysis/CloneDetection.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,7 @@
#ifndef LLVM_CLANG_AST_CLONEDETECTION_H
#define LLVM_CLANG_AST_CLONEDETECTION_H

#include "clang/AST/DeclTemplate.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Regex.h"
#include <vector>

Expand All @@ -31,192 +27,6 @@ class VarDecl;
class ASTContext;
class CompoundStmt;

namespace clone_detection {

/// Returns a string that represents all macro expansions that expanded into the
/// given SourceLocation.
///
/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations
/// A and B are expanded from the same macros in the same order.
std::string getMacroStack(SourceLocation Loc, ASTContext &Context);

/// Collects the data of a single Stmt.
///
/// This class defines what a code clone is: If it collects for two statements
/// the same data, then those two statements are considered to be clones of each
/// other.
///
/// All collected data is forwarded to the given data consumer of the type T.
/// The data consumer class needs to provide a member method with the signature:
/// update(StringRef Str)
template <typename T>
class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector<T>> {

ASTContext &Context;
/// The data sink to which all data is forwarded.
T &DataConsumer;

public:
/// Collects data of the given Stmt.
/// \param S The given statement.
/// \param Context The ASTContext of S.
/// \param DataConsumer The data sink to which all data is forwarded.
StmtDataCollector(const Stmt *S, ASTContext &Context, T &DataConsumer)
: Context(Context), DataConsumer(DataConsumer) {
this->Visit(S);
}

typedef unsigned DataPiece;

// Below are utility methods for appending different data to the vector.

void addData(DataPiece Integer) {
DataConsumer.update(
StringRef(reinterpret_cast<char *>(&Integer), sizeof(Integer)));
}

void addData(llvm::StringRef Str) { DataConsumer.update(Str); }

void addData(const QualType &QT) { addData(QT.getAsString()); }

// The functions below collect the class specific data of each Stmt subclass.

// Utility macro for defining a visit method for a given class. This method
// calls back to the ConstStmtVisitor to visit all parent classes.
#define DEF_ADD_DATA(CLASS, CODE) \
void Visit##CLASS(const CLASS *S) { \
CODE; \
ConstStmtVisitor<StmtDataCollector>::Visit##CLASS(S); \
}

DEF_ADD_DATA(Stmt, {
addData(S->getStmtClass());
// This ensures that macro generated code isn't identical to macro-generated
// code.
addData(getMacroStack(S->getLocStart(), Context));
addData(getMacroStack(S->getLocEnd(), Context));
})
DEF_ADD_DATA(Expr, { addData(S->getType()); })

//--- Builtin functionality ----------------------------------------------//
DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })
DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })
DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })
DEF_ADD_DATA(TypeTraitExpr, {
addData(S->getTrait());
for (unsigned i = 0; i < S->getNumArgs(); ++i)
addData(S->getArg(i)->getType());
})

//--- Calls --------------------------------------------------------------//
DEF_ADD_DATA(CallExpr, {
// Function pointers don't have a callee and we just skip hashing it.
if (const FunctionDecl *D = S->getDirectCallee()) {
// If the function is a template specialization, we also need to handle
// the template arguments as they are not included in the qualified name.
if (auto Args = D->getTemplateSpecializationArgs()) {
std::string ArgString;

// Print all template arguments into ArgString
llvm::raw_string_ostream OS(ArgString);
for (unsigned i = 0; i < Args->size(); ++i) {
Args->get(i).print(Context.getLangOpts(), OS);
// Add a padding character so that 'foo<X, XX>()' != 'foo<XX, X>()'.
OS << '\n';
}
OS.flush();

addData(ArgString);
}
addData(D->getQualifiedNameAsString());
}
})

//--- Exceptions ---------------------------------------------------------//
DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); })

//--- C++ OOP Stmts ------------------------------------------------------//
DEF_ADD_DATA(CXXDeleteExpr, {
addData(S->isArrayFormAsWritten());
addData(S->isGlobalDelete());
})

//--- Casts --------------------------------------------------------------//
DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); })

//--- Miscellaneous Exprs ------------------------------------------------//
DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); })
DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); })

//--- Control flow -------------------------------------------------------//
DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); })
DEF_ADD_DATA(IndirectGotoStmt, {
if (S->getConstantTarget())
addData(S->getConstantTarget()->getName());
})
DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); })
DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); })
DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); })

//--- Objective-C --------------------------------------------------------//
DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); })
DEF_ADD_DATA(ObjCPropertyRefExpr, {
addData(S->isSuperReceiver());
addData(S->isImplicitProperty());
})
DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); })

//--- Miscellaneous Stmts ------------------------------------------------//
DEF_ADD_DATA(CXXFoldExpr, {
addData(S->isRightFold());
addData(S->getOperator());
})
DEF_ADD_DATA(GenericSelectionExpr, {
for (unsigned i = 0; i < S->getNumAssocs(); ++i) {
addData(S->getAssocType(i));
}
})
DEF_ADD_DATA(LambdaExpr, {
for (const LambdaCapture &C : S->captures()) {
addData(C.isPackExpansion());
addData(C.getCaptureKind());
if (C.capturesVariable())
addData(C.getCapturedVar()->getType());
}
addData(S->isGenericLambda());
addData(S->isMutable());
})
DEF_ADD_DATA(DeclStmt, {
auto numDecls = std::distance(S->decl_begin(), S->decl_end());
addData(static_cast<DataPiece>(numDecls));
for (const Decl *D : S->decls()) {
if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
addData(VD->getType());
}
}
})
DEF_ADD_DATA(AsmStmt, {
addData(S->isSimple());
addData(S->isVolatile());
addData(S->generateAsmString(Context));
for (unsigned i = 0; i < S->getNumInputs(); ++i) {
addData(S->getInputConstraint(i));
}
for (unsigned i = 0; i < S->getNumOutputs(); ++i) {
addData(S->getOutputConstraint(i));
}
for (unsigned i = 0; i < S->getNumClobbers(); ++i) {
addData(S->getClobber(i));
}
})
DEF_ADD_DATA(AttributedStmt, {
for (const Attr *A : S->getAttrs()) {
addData(std::string(A->getSpelling()));
}
})
};
} // namespace clone_detection

/// Identifies a list of statements.
///
/// Can either identify a single arbitrary Stmt object, a continuous sequence of
Expand Down
1 change: 1 addition & 0 deletions lib/AST/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ add_clang_library(clangAST
CommentLexer.cpp
CommentParser.cpp
CommentSema.cpp
DataCollection.cpp
Decl.cpp
DeclarationName.cpp
DeclBase.cpp
Expand Down
50 changes: 50 additions & 0 deletions lib/AST/DataCollection.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===-- DataCollection.cpp --------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "clang/AST/DataCollection.h"

#include "clang/Lex/Lexer.h"

namespace clang {
namespace data_collection {

/// Prints the macro name that contains the given SourceLocation into the given
/// raw_string_ostream.
static void printMacroName(llvm::raw_string_ostream &MacroStack,
ASTContext &Context, SourceLocation Loc) {
MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(),
Context.getLangOpts());

// Add an empty space at the end as a padding to prevent
// that macro names concatenate to the names of other macros.
MacroStack << " ";
}

/// Returns a string that represents all macro expansions that expanded into the
/// given SourceLocation.
///
/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations
/// A and B are expanded from the same macros in the same order.
std::string getMacroStack(SourceLocation Loc, ASTContext &Context) {
std::string MacroStack;
llvm::raw_string_ostream MacroStackStream(MacroStack);
SourceManager &SM = Context.getSourceManager();

// Iterate over all macros that expanded into the given SourceLocation.
while (Loc.isMacroID()) {
// Add the macro name to the stream.
printMacroName(MacroStackStream, Context, Loc);
Loc = SM.getImmediateMacroCallerLoc(Loc);
}
MacroStackStream.flush();
return MacroStack;
}

} // end namespace data_collection
} // end namespace clang
Loading

0 comments on commit 9fb7790

Please sign in to comment.