Skip to content

Commit

Permalink
Support apple silicon
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesge committed Jun 8, 2022
1 parent 7f6001e commit efa9989
Show file tree
Hide file tree
Showing 14 changed files with 196 additions and 49 deletions.
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ endif

#required by butil/crc32.cc to boost performance for 10x
ifeq ($(shell test $(GCC_VERSION) -ge 40400; echo $$?),0)
CXXFLAGS+=-msse4 -msse4.2
ifeq ($(shell uname -p),i386) #note: i386 is processor family type, not the 32-bit x86 arch
CXXFLAGS+=-msse4 -msse4.2
endif
endif
#not solved yet
ifeq ($(CC),gcc)
ifeq ($(shell test $(GCC_VERSION) -ge 70000; echo $$?),0)
CXXFLAGS+=-Wno-aligned-new
endif
ifeq ($(shell test $(GCC_VERSION) -ge 70000; echo $$?),0)
CXXFLAGS+=-Wno-aligned-new
endif
endif

BUTIL_SOURCES = \
Expand Down
21 changes: 13 additions & 8 deletions config_brpc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -154,16 +154,21 @@ find_dir_of_header_or_die() {
}

if [ "$SYSTEM" = "Darwin" ]; then
OPENSSL_LIB="/usr/local/opt/openssl/lib"
OPENSSL_HDR="/usr/local/opt/openssl/include"
else
# User specified path of openssl, if not given it's empty
OPENSSL_LIB=$(find_dir_of_lib ssl)
# Inconvenient to check these headers in baidu-internal
#PTHREAD_HDR=$(find_dir_of_header_or_die pthread.h)
OPENSSL_HDR=$(find_dir_of_header_or_die openssl/ssl.h mesalink/openssl/ssl.h)
if [ -d "/usr/local/opt/openssl" ]; then
LIBS_IN="/usr/local/opt/openssl/lib $LIBS_IN"
HDRS_IN="/usr/local/opt/openssl/include $HDRS_IN"
elif [ -d "/opt/homebrew/Cellar" ]; then
LIBS_IN="/opt/homebrew/Cellar $LIBS_IN"
HDRS_IN="/opt/homebrew/Cellar $HDRS_IN"
fi
fi

# User specified path of openssl, if not given it's empty
OPENSSL_LIB=$(find_dir_of_lib ssl)
# Inconvenient to check these headers in baidu-internal
#PTHREAD_HDR=$(find_dir_of_header_or_die pthread.h)
OPENSSL_HDR=$(find_dir_of_header_or_die openssl/ssl.h mesalink/openssl/ssl.h)

if [ $WITH_MESALINK != 0 ]; then
MESALINK_HDR=$(find_dir_of_header_or_die mesalink/openssl/ssl.h)
OPENSSL_HDR="$OPENSSL_HDR\n$MESALINK_HDR"
Expand Down
33 changes: 17 additions & 16 deletions docs/cn/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ brpc有如下依赖:
## Ubuntu/LinuxMint/WSL
### 依赖准备

安装通用依赖,[gflags](https://github.com/gflags/gflags), [protobuf](https://github.com/google/protobuf), [leveldb](https://github.com/google/leveldb):
安装依赖:
```shell
sudo apt-get install -y git g++ make libssl-dev libgflags-dev libprotobuf-dev libprotoc-dev protobuf-compiler libleveldb-dev
```
Expand Down Expand Up @@ -113,14 +113,9 @@ CentOS一般需要安装EPEL,否则很多包都默认不可用。
sudo yum install epel-release
```

安装通用依赖
安装依赖
```shell
sudo yum install git gcc-c++ make openssl-devel
```

安装 [gflags](https://github.com/gflags/gflags), [protobuf](https://github.com/google/protobuf), [leveldb](https://github.com/google/leveldb):
```shell
sudo yum install gflags-devel protobuf-devel protobuf-compiler leveldb-devel
sudo yum install git gcc-c++ make openssl-devel gflags-devel protobuf-devel protobuf-compiler leveldb-devel
```

如果你要在样例中启用cpu/heap的profiler:
Expand Down Expand Up @@ -216,18 +211,13 @@ $ make

## MacOS

注意:在相同运行环境下,当前Mac版brpc的性能比Linux版差2.5倍。如果你的服务是性能敏感的,请不要使用MacOs作为你的生产环境
注意:在相同硬件条件下,MacOS版brpc的性能可能明显差于Linux版。如果你的服务是性能敏感的,请不要使用MacOS作为你的生产环境

### 依赖准备

安装通用依赖:
```shell
brew install openssl git gnu-getopt coreutils
```

安装[gflags](https://github.com/gflags/gflags)[protobuf](https://github.com/google/protobuf)[leveldb](https://github.com/google/leveldb)
安装依赖:
```shell
brew install gflags protobuf leveldb
brew install openssl git gnu-getopt coreutils gflags protobuf leveldb
```

如果你要在样例中启用cpu/heap的profiler:
Expand All @@ -241,6 +231,17 @@ git clone https://github.com/google/googletest -b release-1.10.0 && cd googletes
```
在编译完成后,复制include/和lib/目录到/usr/local/include和/usr/local/lib目录中,以便于让所有应用都能使用gtest。

### Monterey
Monterey中openssl的安装位置可能不再位于`/usr/local/opt/openssl`,很可能会在`/opt/homebrew/Cellar`目录下,如果编译时报告找不到openssl,可考虑设置软链如下:
```shell
sudo ln -s /opt/homebrew/Cellar/openssl@3/3.0.3 /usr/local/opt/openssl
```
请注意上述命令中openssl的目录可能随环境变化而变化,你可以通过`brew info openssl`查看。

### Apple Silicon

master HEAD已支持M1系列芯片,M2未测试过。欢迎通过issues向我们报告遗留的warning/error。

### 使用config_brpc.sh编译brpc
git克隆brpc,进入到项目目录然后运行:
```shell
Expand Down
25 changes: 17 additions & 8 deletions docs/en/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,18 +215,13 @@ Same with [here](#compile-brpc-with-cmake)

## MacOS

Note: In the same running environment, the performance of the current Mac version is about 2.5 times worse than the Linux version. If your service is performance-critical, do not use MacOS as your production environment.
Note: With same environment, the performance of the MacOS version is worse than the Linux version. If your service is performance-critical, do not use MacOS as your production environment.

### Prepare deps

Install common deps:
Install dependencies:
```shell
brew install openssl git gnu-getopt coreutils
```

Install [gflags](https://github.com/gflags/gflags), [protobuf](https://github.com/google/protobuf), [leveldb](https://github.com/google/leveldb):
```shell
brew install gflags protobuf leveldb
brew install openssl git gnu-getopt coreutils gflags protobuf leveldb
```

If you need to enable cpu/heap profilers in examples:
Expand All @@ -240,6 +235,20 @@ git clone https://github.com/google/googletest -b release-1.10.0 && cd googletes
```
After the compilation, copy include/ and lib/ into /usr/local/include and /usr/local/lib respectively to expose gtest to all apps

### Monterey

openssl installed in Monterey may not be found at `/usr/local/opt/openssl`, instead it's probably put under `/opt/homebrew/Cellar`. If the compiler cannot find openssl, consider making a soft link like below:

```shell
sudo ln -s /opt/homebrew/Cellar/openssl@3/3.0.3 /usr/local/opt/openssl
```

Please note that the directory of installed openssl in above command may be different in different environments, you could check it out by running `brew info openssl`.

### Apple Silicon

The code at master HEAD already supports M1 series chips. M2 series are not tested yet. Please feel free to report remaining warnings/errors to us by issues.

### Compile brpc with config_brpc.sh
git clone brpc, cd into the repo and run
```shell
Expand Down
2 changes: 1 addition & 1 deletion src/brpc/span.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include "brpc/span.pb.h"

namespace bthread {
extern thread_local bthread::LocalStorage tls_bls;
extern __thread bthread::LocalStorage tls_bls;
}


Expand Down
97 changes: 97 additions & 0 deletions src/bthread/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -713,3 +713,100 @@ __asm (
);

#endif


#if defined(BTHREAD_CONTEXT_PLATFORM_apple_arm64) && defined(BTHREAD_CONTEXT_COMPILER_gcc)
__asm (
".text\n"
".globl _bthread_jump_fcontext\n"
".balign 16\n"
"_bthread_jump_fcontext:\n"
" ; prepare stack for GP + FPU\n"
" sub sp, sp, #0xb0\n"
"#if (defined(__VFP_FP__) && !defined(__SOFTFP__))\n"
" ; test if fpu env should be preserved\n"
" cmp w3, #0\n"
" b.eq 1f\n"
" ; save d8 - d15\n"
" stp d8, d9, [sp, #0x00]\n"
" stp d10, d11, [sp, #0x10]\n"
" stp d12, d13, [sp, #0x20]\n"
" stp d14, d15, [sp, #0x30]\n"
"1:\n"
"#endif\n"
" ; save x19-x30\n"
" stp x19, x20, [sp, #0x40]\n"
" stp x21, x22, [sp, #0x50]\n"
" stp x23, x24, [sp, #0x60]\n"
" stp x25, x26, [sp, #0x70]\n"
" stp x27, x28, [sp, #0x80]\n"
" stp fp, lr, [sp, #0x90]\n"
" ; save LR as PC\n"
" str lr, [sp, #0xa0]\n"
" ; store RSP (pointing to context-data) in first argument (x0).\n"
" ; STR cannot have sp as a target register\n"
" mov x4, sp\n"
" str x4, [x0]\n"
" ; restore RSP (pointing to context-data) from A2 (x1)\n"
" mov sp, x1\n"
"#if (defined(__VFP_FP__) && !defined(__SOFTFP__))\n"
" ; test if fpu env should be preserved\n"
" cmp w3, #0\n"
" b.eq 2f\n"
" ; load d8 - d15\n"
" ldp d8, d9, [sp, #0x00]\n"
" ldp d10, d11, [sp, #0x10]\n"
" ldp d12, d13, [sp, #0x20]\n"
" ldp d14, d15, [sp, #0x30]\n"
"2:\n"
"#endif\n"
" ; load x19-x30\n"
" ldp x19, x20, [sp, #0x40]\n"
" ldp x21, x22, [sp, #0x50]\n"
" ldp x23, x24, [sp, #0x60]\n"
" ldp x25, x26, [sp, #0x70]\n"
" ldp x27, x28, [sp, #0x80]\n"
" ldp fp, lr, [sp, #0x90]\n"
" ; use third arg as return value after jump\n"
" ; and as first arg in context function\n"
" mov x0, x2\n"
" ; load pc\n"
" ldr x4, [sp, #0xa0]\n"
" ; restore stack from GP + FPU\n"
" add sp, sp, #0xb0\n"
" ret x4\n"
);

#endif

#if defined(BTHREAD_CONTEXT_PLATFORM_apple_arm64) && defined(BTHREAD_CONTEXT_COMPILER_gcc)
__asm (
".text\n"
".globl _bthread_make_fcontext\n"
".balign 16\n"
"_bthread_make_fcontext:\n"
" ; shift address in x0 (allocated stack) to lower 16 byte boundary\n"
" and x0, x0, ~0xF\n"
" ; reserve space for context-data on context-stack\n"
" sub x0, x0, #0xb0\n"
" ; third arg of make_fcontext() == address of context-function\n"
" ; store address as a PC to jump in\n"
" str x2, [x0, #0xa0]\n"
" ; compute abs address of label finish\n"
" ; 0x0c = 3 instructions * size (4) before label 'finish'\n"
" ; TODO: Numeric offset since llvm still does not support labels in ADR. Fix:\n"
" ; http:\n"
" adr x1, 0x0c\n"
" ; save address of finish as return-address for context-function\n"
" ; will be entered after context-function returns (LR register)\n"
" str x1, [x0, #0x98]\n"
" ret lr ; return pointer to context-data (x0)\n"
"finish:\n"
" ; exit code is zero\n"
" mov x0, #0\n"
" ; exit application\n"
" bl __exit\n"
);

#endif

19 changes: 11 additions & 8 deletions src/bthread/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@

#if defined(__GNUC__) || defined(__APPLE__)

#define BTHREAD_CONTEXT_COMPILER_gcc
#define BTHREAD_CONTEXT_COMPILER_gcc

#if defined(__linux__)
#if defined(__linux__)
#ifdef __x86_64__
#define BTHREAD_CONTEXT_PLATFORM_linux_x86_64
#define BTHREAD_CONTEXT_CALL_CONVENTION
Expand All @@ -41,27 +41,30 @@
#define BTHREAD_CONTEXT_CALL_CONVENTION
#endif

#elif defined(__MINGW32__) || defined (__MINGW64__)
#elif defined(__MINGW32__) || defined (__MINGW64__)
#if defined(__x86_64__)
#define BTHREAD_CONTEXT_COMPILER_gcc
#define BTHREAD_CONTEXT_PLATFORM_windows_x86_64
#define BTHREAD_CONTEXT_CALL_CONVENTION
#endif

#if defined(__i386__)
#elif defined(__i386__)
#define BTHREAD_CONTEXT_COMPILER_gcc
#define BTHREAD_CONTEXT_PLATFORM_windows_i386
#define BTHREAD_CONTEXT_CALL_CONVENTION __cdecl
#endif
#elif defined(__APPLE__) && defined(__MACH__)

#elif defined(__APPLE__) && defined(__MACH__)
#if defined (__i386__)
#define BTHREAD_CONTEXT_PLATFORM_apple_i386
#define BTHREAD_CONTEXT_CALL_CONVENTION
#elif defined (__x86_64__)
#define BTHREAD_CONTEXT_PLATFORM_apple_x86_64
#define BTHREAD_CONTEXT_CALL_CONVENTION
#endif
#elif defined (__aarch64__)
#define BTHREAD_CONTEXT_PLATFORM_apple_arm64
#define BTHREAD_CONTEXT_CALL_CONVENTION
#endif
#endif

#endif

#if defined(_WIN32_WCE)
Expand Down
2 changes: 1 addition & 1 deletion src/bthread/key.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class KeyTable;

// defined in task_group.cpp
extern __thread TaskGroup* tls_task_group;
extern thread_local LocalStorage tls_bls;
extern __thread LocalStorage tls_bls;
static __thread bool tls_ever_created_keytable = false;

// We keep thread specific data in a two-level array. The top-level array
Expand Down
2 changes: 1 addition & 1 deletion src/bthread/task_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ __thread TaskGroup* tls_task_group = NULL;
// Sync with TaskMeta::local_storage when a bthread is created or destroyed.
// During running, the two fields may be inconsistent, use tls_bls as the
// groundtruth.
thread_local LocalStorage tls_bls = BTHREAD_LOCAL_STORAGE_INITIALIZER;
__thread LocalStorage tls_bls = BTHREAD_LOCAL_STORAGE_INITIALIZER;

// defined in bthread/key.cpp
extern void return_keytable(bthread_keytable_pool_t*, KeyTable*);
Expand Down
26 changes: 26 additions & 0 deletions src/butil/time.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ inline int64_t monotonic_time_s() {

namespace detail {
inline uint64_t clock_cycles() {
#if defined(__x86_64__) || defined(__amd64__)
unsigned int lo = 0;
unsigned int hi = 0;
// We cannot use "=A", since this would use %rax on x86_64
Expand All @@ -223,6 +224,31 @@ inline uint64_t clock_cycles() {
: "=a" (lo), "=d" (hi)
);
return ((uint64_t)hi << 32) | lo;
#elif defined(__aarch64__)
uint64_t virtual_timer_value;
asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
return virtual_timer_value;
#elif defined(__ARM_ARCH)
#if (__ARM_ARCH >= 6)
unsigned int pmccntr;
unsigned int pmuseren;
unsigned int pmcntenset;
// Read the user mode perf monitor counter access permissions.
asm volatile ("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
asm volatile ("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
if (pmcntenset & 0x80000000ul) { // Is it counting?
asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
// The counter is set up to count every 64th cycle
return static_cast<uint64_t>(pmccntr) * 64; // Should optimize to << 6
}
}
#else
#error "unsupported arm_arch"
#endif
#else
#error "unsupported arch"
#endif
}
extern int64_t read_invariant_cpu_frequency();
// Be positive iff:
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ use_cxx11()

if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
#required by butil/crc32.cc to boost performance for 10x
if(NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.4))
if((CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.4))
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4 -msse4.2")
endif()
if(NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0))
Expand Down
4 changes: 3 additions & 1 deletion test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ CXXFLAGS=$(CPPFLAGS) -pipe -Wall -W -fPIC -fstrict-aliasing -Wno-invalid-offseto

#required by butil/crc32.cc to boost performance for 10x
ifeq ($(shell test $(GCC_VERSION) -ge 40400; echo $$?),0)
CXXFLAGS+=-msse4 -msse4.2
ifeq ($(shell uname -p),i386) #note: i386 is processor family type, not the 32-bit x86 arch
CXXFLAGS+=-msse4 -msse4.2
endif
endif
#not solved yet
ifeq ($(CC),gcc)
Expand Down
Loading

0 comments on commit efa9989

Please sign in to comment.