Skip to content

Commit

Permalink
feat: support neon, sse simd and dynamic dispatch (bytedance#56)
Browse files Browse the repository at this point in the history
# Main changes
- support both static dispatch and dynamic dispatch
- support neon and sse architecture
  • Loading branch information
xiegx94 authored Apr 11, 2023
1 parent e3038ea commit 50514ec
Show file tree
Hide file tree
Showing 60 changed files with 3,955 additions and 1,193 deletions.
40 changes: 23 additions & 17 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@ jobs:
strategy:
fail-fast: false
matrix:
llvm_version:
- '11'
- '13.0.0'
- '15'
tool:
- 'cmake'
- 'bazel'
llvm_version: ['11', '13.0.0', '15']
tool: ['cmake', 'bazel']
arch: [westmere, haswell]
exclude:
- tool: 'cmake'
arch: westmere
env:
CC: clang
CXX: clang++
Expand All @@ -38,10 +37,10 @@ jobs:
with:
cmake-version: "latest"

- name: Run Test Use Bazel
- name: Run ${{ matrix.arch }} Test Use Bazel
if: matrix.tool == 'bazel'
run : |
bash ./scripts/unittest.sh -c
bash ./scripts/unittest.sh -c --arch=${{ matrix.arch }}
- name: Run Test Use CMake
if: matrix.tool == 'cmake'
Expand All @@ -53,12 +52,19 @@ jobs:
strategy:
fail-fast: false
matrix:
gcc_version:
- '9'
- '12'
tool:
- 'cmake'
- 'bazel'
gcc_version: ['9', '12']
tool: ['cmake', 'bazel']
arch: [westmere, haswell]
dispatch: [static, dynamic]
exclude:
- tool: 'bazel'
arch: haswell
dispatch: dynamic
- tool: 'cmake'
dispatch: dynamic
- tool: 'cmake'
arch: westmere

env:
CC: gcc-${{ matrix.gcc_version }}
CXX: g++-${{ matrix.gcc_version}}
Expand All @@ -82,10 +88,10 @@ jobs:
with:
cmake-version: "latest"

- name: Run Test Use Bazel
- name: Run ${{ matrix.arch }} ${{ matrix.dispatch }} Test Use Bazel
if: matrix.tool == 'bazel'
run : |
bash ./scripts/unittest.sh -g
bash ./scripts/unittest.sh -g --arch=${{ matrix.arch }} --dispatch=${{ matrix.dispatch }}
- name: Run Test Use CMake
if: matrix.tool == 'cmake'
Expand Down
153 changes: 146 additions & 7 deletions BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,8 +1,73 @@
package(default_visibility = ["//visibility:public"])
load("@bazel_skylib//rules:common_settings.bzl", "string_flag")

common_copts = ['-mavx2', '-mbmi', '-mpclmul']
sanitize_copts = ['-fsanitize=address,undefined', '-fsanitize-recover=address']

avx2_copts = ['-mavx2', '-mbmi', '-mpclmul']
sse_copts = ['-msse', '-msse2', '-msse4.1', '-mpclmul']
arm_copts = ['-march-armv8-a']
benchmark_copts = ['-O3', '-DNDEBUG', '-std=c++17']
static_dispatch_copts = []
dynamic_dispatch_copts = ['-DSONIC_DYNAMIC_DISPATCH=1']
no_san_linkopts = []
gcc_san_linkopts = ['-lasan']
clang_san_linkopts = ['-fsanitize-link-c++-runtime']

string_flag(
name = "sonic_arch",
build_setting_default = "haswell",
)

string_flag(
name = "sonic_dispatch",
build_setting_default = "static",
)

string_flag(
name = "sonic_sanitizer",
build_setting_default = "no",
)

config_setting(
name = "arm_build",
flag_values = {":sonic_arch": "arm"},
)

config_setting(
name = "sse_build",
flag_values = {":sonic_arch": "westmere"},
)
config_setting(
name = "avx2_build",
flag_values = {":sonic_arch": "haswell"},
)

config_setting(
name = "static_dispatch",
flag_values = {":sonic_dispatch": "static"},
)

config_setting(
name = "dynamic_dispatch",
flag_values = {":sonic_dispatch": "dynamic"},
)

config_setting(
name = "no_san",
flag_values = {":sonic_sanitizer": "no"},
)

config_setting(
name = "gcc_san",
flag_values = {":sonic_sanitizer": "gcc"},
)

config_setting(
name = "clang_san",
flag_values = {":sonic_sanitizer": "clang"},
)

cc_library(
name = "string_view",
hdrs = glob(["include/thirdparty/**/*.h"]),
Expand Down Expand Up @@ -35,12 +100,21 @@ cc_binary(
"@simdjson",
"@jsoncpp//:jsoncpp",
],
copts = common_copts + ['-DNDEBUG', '-std=c++17'],
# copts = common_copts + ['-DNDEBUG', '-std=c++17'],
copts = select({
"arm_build": arm_copts + benchmark_copts,
"sse_build": sse_copts + benchmark_copts,
"avx2_build": avx2_copts + benchmark_copts,
}) +\
select({
"static_dispatch": static_dispatch_copts,
"dynamic_dispatch": dynamic_dispatch_copts,
}),
linkopts = ['-lstdc++fs'],
)

cc_test(
name = "unittest-gcc",
name = "unittest",
srcs = glob([
"tests/*.cpp",
"include/sonic/*",
Expand All @@ -51,10 +125,49 @@ cc_test(
"@gtest//:gtest_main",
],
data = glob([ "testdata/*.json"]),
linkopts = [ '-lstdc++fs', '-fstack-protector-all',] +\
select({
"no_san": [],
"gcc_san": sanitize_copts + gcc_san_linkopts,
"clang_san": sanitize_copts + clang_san_linkopts,
}),
copts = [
'-O3', '-g', '-UNDEBUG', '-std=c++14',
'-fstack-protector-all',
'-Iinclude', '-Wall', '-Wextra', '-Werror',
] + \
select({
"no_san": [],
"gcc_san": sanitize_copts,
"clang_san": sanitize_copts,
}) + \
select({
"arm_build": arm_copts + benchmark_copts,
"sse_build": sse_copts + benchmark_copts,
"avx2_build": avx2_copts + benchmark_copts,
}) +\
select({
"static_dispatch": static_dispatch_copts,
"dynamic_dispatch": dynamic_dispatch_copts,
}),
)

cc_test(
name = "unittest-clang",
srcs = glob([
"tests/*.h",
"tests/*.cpp",
"include/sonic/*",
"include/sonic/**/*",
]),
deps = [
":string_view",
"@gtest//:gtest_main",
],
data = glob([ "testdata/*.json"]),
linkopts = sanitize_copts + [
'-lstdc++fs',
'-fstack-protector-all',
'-lasan'
],
copts = common_copts + sanitize_copts + [
'-O3', '-g', '-UNDEBUG', '-std=c++14',
Expand All @@ -64,14 +177,14 @@ cc_test(
)

cc_test(
name = "unittest-clang",
name = "unittest-arm",
srcs = glob([
"tests/*.h",
"tests/*.cpp",
"include/sonic/*",
"include/sonic/**/*",
]),
deps = [
deps = [
":string_view",
"@gtest//:gtest_main",
],
Expand All @@ -81,8 +194,33 @@ cc_test(
'-fstack-protector-all',
'-fsanitize-link-c++-runtime'
],
copts = common_copts + sanitize_copts + [
'-O3', '-g', '-UNDEBUG', '-std=c++14',
copts = sanitize_copts + [
'-O3', '-g', '-UNDEBUG', '-std=c++14', '-march=armv8-a',
'-fstack-protector-all',
'-Iinclude', '-Wall', '-Wextra', '-Werror',
],
)

cc_test(
name = "unittest-sse",
srcs = glob([
"tests/*.h",
"tests/*.cpp",
"include/sonic/*",
"include/sonic/**/*",
]),
deps = [
":string_view",
"@gtest//:gtest_main",
],
data = glob([ "testdata/*.json"]),
linkopts = sanitize_copts + [
'-lstdc++fs',
'-fstack-protector-all',
'-fsanitize-link-c++-runtime'
],
copts = sanitize_copts + [
'-O3', '-g', '-UNDEBUG', '-std=c++14', '-march=westmere',
'-fstack-protector-all',
'-Iinclude', '-Wall', '-Wextra', '-Werror',
],
Expand Down Expand Up @@ -112,3 +250,4 @@ cc_test(
'-fprofile-arcs', '-ftest-coverage',
],
)

14 changes: 14 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")

http_archive(
name = "bazel_skylib",
sha256 = "b8a1527901774180afc798aeb28c4634bdccf19c4d98e7bdd1ce79d1fe9aaad7",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.4.1/bazel-skylib-1.4.1.tar.gz",
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.4.1/bazel-skylib-1.4.1.tar.gz",
],
)

load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")

bazel_skylib_workspace()

git_repository(
name = "google_benchmark",
Expand Down
3 changes: 2 additions & 1 deletion bazel/cJSON.BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ cc_library(
name = "cJSON",
srcs = ["cJSON.c"],
hdrs = ["cJSON.h"],
copts = ['-O3' ,'-DNDEBUG', '-march=haswell'],
copts = ['-O3' ,'-DNDEBUG',],
)

3 changes: 2 additions & 1 deletion bazel/yyjson.BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ cc_library(
srcs = ["src/yyjson.c"],
hdrs = ["src/yyjson.h"],
includes = ["src"],
copts = ['-O3', '-DNDEBUG', '-march=haswell', '-g'],
copts = ['-O3', '-DNDEBUG', '-g'],
)

10 changes: 10 additions & 0 deletions include/sonic/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,17 @@ class SpinLock {
break;
}
while (lock_.load(std::memory_order_relaxed)) {
// use pause or yield instruction will slow down lock acquisition
// on contended locks.
#ifndef SONIC_SPINLOCK_NO_PAUSE

#if defined(__x86_64__) || defined(_M_AMD64)
__builtin_ia32_pause();
#elif defined(__aarch64__) || defined(_M_ARM64)
asm volatile("yield");
#endif

#endif
}
}
}
Expand Down
2 changes: 0 additions & 2 deletions include/sonic/dom/dynamicnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
#include "sonic/dom/type.h"
#include "sonic/error.h"
#include "sonic/internal/ftoa.h"
#include "sonic/internal/itoa.h"
#include "sonic/internal/quote.h"
#include "sonic/writebuffer.h"

namespace sonic_json {
Expand Down
10 changes: 5 additions & 5 deletions include/sonic/dom/handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <string>

#include "sonic/dom/type.h"
#include "sonic/internal/haswell.h"
#include "sonic/internal/arch/simd_base.h"
#include "sonic/string_view.h"
#include "sonic/writebuffer.h"

Expand Down Expand Up @@ -153,7 +153,7 @@ class SAXHandler {
if (pairs) {
void *mem = obj.template containerMalloc<MemberType>(pairs, *alloc_);
obj.setChildren(mem);
internal::haswell::xmemcpy<sizeof(MemberType)>(
internal::Xmemcpy<sizeof(MemberType)>(
(void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs);
} else {
obj.setChildren(nullptr);
Expand All @@ -169,7 +169,7 @@ class SAXHandler {
arr.setLength(count, kArray);
if (count) {
arr.setChildren(arr.template containerMalloc<NodeType>(count, *alloc_));
internal::haswell::xmemcpy<sizeof(NodeType)>(
internal::Xmemcpy<sizeof(NodeType)>(
(void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count);
} else {
arr.setChildren(nullptr);
Expand Down Expand Up @@ -239,7 +239,7 @@ class LazySAXHandler {
arr.setLength(count, kArray);
if (count) {
arr.setChildren(arr.template containerMalloc<NodeType>(count, *alloc_));
internal::haswell::xmemcpy<sizeof(NodeType)>(
internal::Xmemcpy<sizeof(NodeType)>(
(void *)arr.getArrChildrenFirstUnsafe(), (void *)(&arr + 1), count);
stack_.Pop<NodeType>(count);
} else {
Expand All @@ -254,7 +254,7 @@ class LazySAXHandler {
if (pairs) {
void *mem = obj.template containerMalloc<MemberType>(pairs, *alloc_);
obj.setChildren(mem);
internal::haswell::xmemcpy<sizeof(MemberType)>(
internal::Xmemcpy<sizeof(MemberType)>(
(void *)obj.getObjChildrenFirstUnsafe(), (void *)(&obj + 1), pairs);
stack_.Pop<MemberType>(pairs);
} else {
Expand Down
Loading

0 comments on commit 50514ec

Please sign in to comment.