Skip to content

Commit

Permalink
Use the cpuinfo library instead of our own code for CPU feature detec…
Browse files Browse the repository at this point in the history
…tion.

PiperOrigin-RevId: 313261629
  • Loading branch information
bjacob authored and copybara-github committed May 26, 2020
1 parent 7b75a8b commit 74b7491
Show file tree
Hide file tree
Showing 15 changed files with 1,018 additions and 430 deletions.
23 changes: 23 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,26 @@ http_archive(
"https://github.com/google/googletest/archive/release-1.8.1.tar.gz",
],
)

# clog library, used by cpuinfo for logging
http_archive(
name = "clog",
strip_prefix = "cpuinfo-d5e37adf1406cf899d7d9ec1d317c47506ccb970",
sha256 = "3f2dc1970f397a0e59db72f9fca6ff144b216895c1d606f6c94a507c1e53a025",
urls = [
"https://github.com/pytorch/cpuinfo/archive/d5e37adf1406cf899d7d9ec1d317c47506ccb970.tar.gz",
],
build_file = "@//third_party:clog.BUILD",
)

# cpuinfo library, used for detecting processor characteristics
http_archive(
name = "cpuinfo",
strip_prefix = "cpuinfo-c2092219e7c874783a00a62edb94ddc672f57ab3",
sha256 = "ea56c399a4f6ca5f749e71acb6a7bfdc653eb65d8f658cb2e414a2fcdca1fe8b",
urls = [
"https://github.com/pytorch/cpuinfo/archive/c2092219e7c874783a00a62edb94ddc672f57ab3.zip",
],
build_file = "@//third_party:cpuinfo.BUILD",
patches = ["@//third_party:cpuinfo.patch"],
)
45 changes: 20 additions & 25 deletions ruy/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ config_setting(
values = {"cpu": "k8"},
)

config_setting(
name = "ppc",
values = {
"cpu": "ppc",
},
)

config_setting(
name = "optimized",
values = {
Expand Down Expand Up @@ -278,31 +285,22 @@ cc_library(
)

cc_library(
name = "detect_arm",
srcs = [
"detect_arm.cc",
],
hdrs = [
"detect_arm.h",
],
copts = ruy_copts(),
deps = [
":platform",
],
)

cc_library(
name = "detect_x86",
name = "cpuinfo",
srcs = [
"detect_x86.cc",
"cpuinfo.cc",
],
hdrs = [
"detect_x86.h",
"cpuinfo.h",
],
copts = ruy_copts(),
deps = [
":platform",
copts = ruy_copts() + [
# ruy_copts contains -Wundef, but cpuinfo's header warns with that.
"-Wno-undef",
],
deps = [":platform"] + select({
# cpuinfo does not build on ppc.
":ppc": [],
"//conditions:default": ["@cpuinfo"],
}),
)

cc_library(
Expand Down Expand Up @@ -752,8 +750,6 @@ cc_library(
":allocator",
":check_macros",
":ctx",
":detect_arm",
":detect_x86",
":have_built_path_for",
":path",
":platform",
Expand Down Expand Up @@ -799,8 +795,7 @@ cc_library(
deps = [
":allocator",
":check_macros",
":detect_arm",
":detect_x86",
":cpuinfo",
":have_built_path_for",
":path",
":platform",
Expand Down Expand Up @@ -959,7 +954,7 @@ cc_library(
":context",
":ctx",
":context_get_ctx",
"//ruy/profiler:profiler",
"//ruy/profiler",
] + ruy_test_ext_deps(),
)

Expand Down
55 changes: 55 additions & 0 deletions ruy/cpuinfo.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#include "ruy/cpuinfo.h"

#include "ruy/platform.h"

#define RUY_HAVE_CPUINFO (!RUY_PPC)

#if RUY_HAVE_CPUINFO

#include <cpuinfo.h>

namespace ruy {

CpuInfo::~CpuInfo() {
if (init_status_ == InitStatus::kInitialized) {
cpuinfo_deinitialize();
}
}

bool CpuInfo::EnsureInitialized() {
if (init_status_ == InitStatus::kNotYetAttempted) {
init_status_ =
cpuinfo_initialize() ? InitStatus::kInitialized : InitStatus::kFailed;
}
return init_status_ == InitStatus::kInitialized;
}

bool CpuInfo::NeonDotprod() {
return EnsureInitialized() && cpuinfo_has_arm_neon_dot();
}

bool CpuInfo::Sse42() {
return EnsureInitialized() && cpuinfo_has_x86_sse4_2();
}

bool CpuInfo::Avx2() { return EnsureInitialized() && cpuinfo_has_x86_avx2(); }

bool CpuInfo::Avx512() {
return EnsureInitialized() && cpuinfo_has_x86_avx512f() &&
cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512cd() &&
cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512vl();
}

bool CpuInfo::AvxVnni() {
return EnsureInitialized() && cpuinfo_has_x86_avx512vnni();
}

} // namespace ruy

#else // not RUY_HAVE_CPUINFO

namespace ruy {
CpuInfo::~CpuInfo() {}
} // namespace ruy

#endif
38 changes: 29 additions & 9 deletions ruy/detect_arm.h → ruy/cpuinfo.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2019 Google LLC. All Rights Reserved.
/* Copyright 2020 Google LLC. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -13,17 +13,37 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

// Temporary dotprod-detection code until we can rely on getauxval.

#ifndef RUY_RUY_DETECT_ARM_H_
#define RUY_RUY_DETECT_ARM_H_
#ifndef RUY_RUY_CPUINFO_H_
#define RUY_RUY_CPUINFO_H_

namespace ruy {

// On A64, returns true if the dotprod extension is present.
// On other architectures, returns false unconditionally.
bool DetectDotprod();
// Wraps the functionality that ruy needs from the cpuinfo library.
class CpuInfo final {
public:
CpuInfo() {}
~CpuInfo();

// ARM features
bool NeonDotprod();

// X86 features
bool Sse42();
bool Avx2();
bool Avx512();
bool AvxVnni();

private:
enum class InitStatus {
kNotYetAttempted,
kInitialized,
kFailed,
};
InitStatus init_status_ = InitStatus::kNotYetAttempted;
bool EnsureInitialized();
CpuInfo(const CpuInfo&) = delete;
};

} // namespace ruy

#endif // RUY_RUY_DETECT_ARM_H_
#endif // RUY_RUY_CPUINFO_H_
20 changes: 11 additions & 9 deletions ruy/ctx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ limitations under the License.
#include <functional>

#include "ruy/check_macros.h"
#include "ruy/cpuinfo.h"
#include "ruy/ctx_impl.h"
#include "ruy/detect_arm.h"
#include "ruy/detect_x86.h"
#include "ruy/have_built_path_for.h"
#include "ruy/path.h"
#include "ruy/platform.h"
Expand All @@ -47,13 +46,15 @@ void Ctx::SetRuntimeEnabledPaths(Path paths) {
mutable_impl()->runtime_enabled_paths_ = paths | kNonArchPaths;
}

CpuInfo* Ctx::mutable_cpuinfo() { return &mutable_impl()->cpuinfo_; }

namespace {

// For each Path bit set in `paths_to_test`, performs runtime detection and
// sets the corresponding bit in the return value if and only if it is
// supported. Path bits that are not set in the input
// `paths_to_detect` value are also left not set in the return value.
Path DetectRuntimeSupportedPaths(Path paths_to_detect) {
Path DetectRuntimeSupportedPaths(Path paths_to_detect, CpuInfo* cpuinfo) {
// Paths in kNonArchPaths are always implicitly supported.
// Further logic below may add more bits to `results`.
Path result = kNonArchPaths;
Expand Down Expand Up @@ -84,20 +85,21 @@ Path DetectRuntimeSupportedPaths(Path paths_to_detect) {
// build it at the moment. That is largely because we have had to machine
// encode dotprod instructions, so we don't actually rely on toolchain support
// for them.
maybe_add(Path::kNeonDotprod, []() { return DetectDotprod(); });
maybe_add(Path::kNeonDotprod, [=]() { return cpuinfo->NeonDotprod(); });
#elif RUY_PLATFORM_X86
// x86 SIMD paths currently require both runtime detection, and detection of
// whether we're building the path at all.
maybe_add(Path::kSse42,
[]() { return HaveBuiltPathForSse42() && DetectCpuSse42(); });
[=]() { return HaveBuiltPathForSse42() && cpuinfo->Sse42(); });
maybe_add(Path::kAvx2,
[]() { return HaveBuiltPathForAvx2() && DetectCpuAvx2(); });
[=]() { return HaveBuiltPathForAvx2() && cpuinfo->Avx2(); });
maybe_add(Path::kAvx512,
[]() { return HaveBuiltPathForAvx512() && DetectCpuAvx512(); });
[=]() { return HaveBuiltPathForAvx512() && cpuinfo->Avx512(); });
maybe_add(Path::kAvxVnni,
[]() { return HaveBuiltPathForAvxVnni() && DetectCpuAvxVnni(); });
[=]() { return HaveBuiltPathForAvxVnni() && cpuinfo->AvxVnni(); });
#else
(void)maybe_add;
(void)cpuinfo;
#endif

// Sanity checks
Expand All @@ -116,7 +118,7 @@ Path Ctx::GetRuntimeEnabledPaths() {
// The value Path::kNone indicates the initial state before detection has been
// performed.
if (*paths == Path::kNone) {
*paths = DetectRuntimeSupportedPaths(kAllPaths);
*paths = DetectRuntimeSupportedPaths(kAllPaths, mutable_cpuinfo());
}

return *paths;
Expand Down
2 changes: 2 additions & 0 deletions ruy/ctx.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class ThreadPool;
class Allocator;
class TuningResolver;
class PrepackedCache;
class CpuInfo;
enum class Path : std::uint8_t;
enum class Tuning;

Expand All @@ -47,6 +48,7 @@ class Ctx /* not final, subclassed by CtxImpl */ {
ThreadPool* mutable_thread_pool();
int max_num_threads() const;
void set_max_num_threads(int value);
CpuInfo* mutable_cpuinfo();

// Returns the set of Path's that are available. By default, this is based on
// runtime detection of CPU features, as well as on which code paths were
Expand Down
5 changes: 4 additions & 1 deletion ruy/ctx_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ limitations under the License.
#include <vector>

#include "ruy/allocator.h"
#include "ruy/cpuinfo.h"
#include "ruy/ctx.h"
#include "ruy/path.h"
#include "ruy/prepacked_cache.h"
Expand Down Expand Up @@ -66,9 +67,11 @@ class CtxImpl final : public Ctx {
// this allocator, and its per-thread allocator.
std::unique_ptr<Allocator> main_allocator_;
std::unique_ptr<PrepackedCache> prepacked_cache_;
// Set of Paths detected at runtime to be supported. The initial value kNone
// Set of Paths enabled at runtime. By default, that is based on runtime
// detection, but may be overridden. The initial value kNone
// means that detection has not yet been performed.
Path runtime_enabled_paths_ = Path::kNone;
CpuInfo cpuinfo_;
// State for each thread in the thread pool. Entry 0 is the main thread.
std::vector<std::unique_ptr<ThreadSpecificResource>>
thread_specific_resources_;
Expand Down
Loading

0 comments on commit 74b7491

Please sign in to comment.