Skip to content

Commit

Permalink
CRC32 Power Optimization Changes
Browse files Browse the repository at this point in the history
Summary:
Support for PowerPC Architecture
Detecting AltiVec Support
Closes facebook#2353

Differential Revision: D5210948

Pulled By: siying

fbshipit-source-id: 859a8c063d37697addd89ba2b8a14e5efd5d24bf
  • Loading branch information
kamasubb authored and facebook-github-bot committed Jul 26, 2017
1 parent 30b58cf commit 2289d38
Show file tree
Hide file tree
Showing 10 changed files with 1,949 additions and 25 deletions.
77 changes: 65 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ OPT += -momit-leaf-frame-pointer
endif
endif

ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1))
CXXFLAGS += -DHAS_ALTIVEC
CFLAGS += -DHAS_ALTIVEC
HAS_ALTIVEC=1
endif

ifeq (,$(shell $(CXX) -fsyntax-only -mcpu=power8 -xc /dev/null 2>&1))
CXXFLAGS += -DHAVE_POWER8
CFLAGS += -DHAVE_POWER8
HAVE_POWER8=1
endif

# if we're compiling for release, compile without debug code (-DNDEBUG) and
# don't treat warnings as errors
ifeq ($(DEBUG_LEVEL),0)
Expand Down Expand Up @@ -305,9 +317,9 @@ util/build_version.cc: FORCE
else mv -f $@-t $@; fi
endif

LIBOBJECTS = $(LIB_SOURCES:.cc=.o)
LIBOBJECTS += $(TOOL_LIB_SOURCES:.cc=.o)
MOCKOBJECTS = $(MOCK_LIB_SOURCES:.cc=.o)
LIBOBJECTS = $(LIB_SOURCES:.cc=.cc.o) $(LIB_SOURCES_C:.c=.c.o) $(LIB_SOURCES_ASM:.S=.S.o)
LIBOBJECTS += $(TOOL_LIB_SOURCES:.cc=.cc.o)
MOCKOBJECTS = $(MOCK_LIB_SOURCES:.cc=.cc.o)

GTEST = $(GTEST_DIR)/gtest/gtest-all.o
TESTUTIL = ./util/testutil.o
Expand Down Expand Up @@ -555,14 +567,27 @@ $(SHARED2): $(SHARED4)
$(SHARED3): $(SHARED4)
ln -fs $(SHARED4) $(SHARED3)
endif
SHARED_CC_OBJECTS = $(LIB_SOURCES:.cc=.cc.o)
SHARED_C_OBJECTS = $(LIB_SOURCES_C:.c=.c.o)
SHARED_ASM_OBJECTS = $(LIB_SOURCES_ASM:.S=.S.o)

shared_libobjects = $(patsubst %,shared-objects/%,$(LIBOBJECTS))
SHARED_CC_LIBOBJECTS = $(patsubst %.cc.o,shared-objects/%.cc.o,$(SHARED_CC_OBJECTS))
SHARED_C_LIBOBJECTS = $(patsubst %.c.o,shared-objects/%.c.o,$(SHARED_C_OBJECTS))
SHARED_ASM_LIBOBJECTS = $(patsubst %.S.o,shared-objects/%.S.o,$(SHARED_ASM_OBJECTS))

shared_libobjects = $(SHARED_CC_LIBOBJECTS) $(SHARED_C_LIBOBJECTS) $(SHARED_ASM_LIBOBJECTS)
CLEAN_FILES += shared-objects

$(shared_libobjects): shared-objects/%.o: %.cc
$(SHARED_CC_LIBOBJECTS): shared-objects/%.cc.o: %.cc
$(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@

$(SHARED_C_LIBOBJECTS): shared-objects/%.c.o: %.c
$(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@

$(SHARED4): $(shared_libobjects)
$(SHARED_ASM_LIBOBJECTS): shared-objects/%.S.o: %.S
$(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@

$(SHARED4): $(shared_libobjects)
$(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED3) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(shared_libobjects) $(LDFLAGS) -o $@

endif # PLATFORM_SHARED_EXT
Expand Down Expand Up @@ -1642,12 +1667,26 @@ rocksdbjavastaticpublishcentral:
mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH).jar

# A version of each $(LIBOBJECTS) compiled with -fPIC
java_libobjects = $(patsubst %,jl/%,$(LIBOBJECTS))
JAVA_CC_OBJECTS = $(SHARED_CC_OBJECTS)
JAVA_C_OBJECTS = $(SHARED_C_OBJECTS)
JAVA_ASM_OBJECTS = $(SHARED_ASM_OBJECTS)

JAVA_CC_LIBOBJECTS = $(patsubst %.cc.o,jl/%.cc.o,$(JAVA_CC_OBJECTS))
JAVA_C_LIBOBJECTS = $(patsubst %.c.o,jl/%.c.o,$(JAVA_C_OBJECTS))
JAVA_ASM_LIBOBJECTS = $(patsubst %.S.o,jl/%.S.o,$(JAVA_ASM_OBJECTS))
java_libobjects = $(JAVA_CC_LIBOBJECTS) $(JAVA_C_LIBOBJECTS) $(JAVA_ASM_LIBOBJECTS)
CLEAN_FILES += jl

$(java_libobjects): jl/%.o: %.cc
$(JAVA_CC_LIBOBJECTS): jl/%.cc.o: %.cc
$(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS)

$(JAVA_C_LIBOBJECTS): jl/%.c.o: %.c
$(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS)

$(JAVA_ASM_LIBOBJECTS): jl/%.S.o: %.S
$(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS)


rocksdbjava: $(java_libobjects)
$(AM_V_GEN)cd java;$(MAKE) javalib;
$(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB)
Expand Down Expand Up @@ -1703,19 +1742,24 @@ IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBu
lipo ios-x86/$@ ios-arm/$@ -create -output $@

else
.cc.o:
%.cc.o: %.cc
$(AM_V_CC)$(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS)

.c.o:
%.c.o: %.c
$(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@

%.S.o: %.S
$(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@
endif

endif
# ---------------------------------------------------------------------------
# Source files dependencies detection
# ---------------------------------------------------------------------------

all_sources = $(LIB_SOURCES) $(MAIN_SOURCES) $(MOCK_LIB_SOURCES) $(TOOL_LIB_SOURCES) $(BENCH_LIB_SOURCES) $(TEST_LIB_SOURCES) $(EXP_LIB_SOURCES)
DEPFILES = $(all_sources:.cc=.d)
DEPFILES_C = $(LIB_SOURCES_C:.c=.d)
DEPFILES_ASM = $(LIB_SOURCES_ASM:.S=.d)

# Add proper dependency support so changing a .h file forces a .cc file to
# rebuild.
Expand All @@ -1726,7 +1770,16 @@ $(DEPFILES): %.d: %.cc
@$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \
-MM -MT'$@' -MT'$(<:.cc=.o)' "$<" -o '$@'

depend: $(DEPFILES)
$(DEPFILES_C): %.d: %.c
@$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \
-MM -MT'$@' -MT'$(<:.c=.o)' "$<" -o '$@'

$(DEPFILES_ASM): %.d: %.S
@$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \
-MM -MT'$@' -MT'$(<:.S=.o)' "$<" -o '$@'


depend: $(DEPFILES) $(DEPFILES_C) $(DEPFILES_ASM)

# if the make goal is either "clean" or "format", we shouldn't
# try to import the *.d files.
Expand Down
7 changes: 3 additions & 4 deletions db/db_test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,11 @@ Options DBTestBase::GetOptions(
Options options = default_options;
BlockBasedTableOptions table_options;
bool set_block_based_table_factory = true;
#if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \
!defined(OS_AIX)
#if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \
!defined(OS_AIX)
rocksdb::SyncPoint::GetInstance()->ClearCallBack(
"NewRandomAccessFile:O_DIRECT");
rocksdb::SyncPoint::GetInstance()->ClearCallBack(
"NewWritableFile:O_DIRECT");
rocksdb::SyncPoint::GetInstance()->ClearCallBack("NewWritableFile:O_DIRECT");
#endif

bool can_allow_mmap = IsMemoryMappedAccessSupported();
Expand Down
10 changes: 10 additions & 0 deletions src.mk
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,16 @@ LIB_SOURCES = \
utilities/write_batch_with_index/write_batch_with_index.cc \
utilities/write_batch_with_index/write_batch_with_index_internal.cc \

ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1))
LIB_SOURCES_ASM =\
util/crc32c_ppc_asm.S
LIB_SOURCES_C = \
util/crc32c_ppc.c
else
LIB_SOURCES_ASM =
LIB_SOURCES_C =
endif

TOOL_LIB_SOURCES = \
tools/ldb_cmd.cc \
tools/ldb_tool.cc \
Expand Down
16 changes: 8 additions & 8 deletions tools/db_stress.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2370,15 +2370,15 @@ int main(int argc, char** argv) {
#if !defined(NDEBUG) && !defined(OS_MACOSX) && !defined(OS_WIN) && \
!defined(OS_SOLARIS) && !defined(OS_AIX)
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"NewWritableFile:O_DIRECT", [&](void* arg) {
int* val = static_cast<int*>(arg);
*val &= ~O_DIRECT;
});
"NewWritableFile:O_DIRECT", [&](void* arg) {
int* val = static_cast<int*>(arg);
*val &= ~O_DIRECT;
});
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"NewRandomAccessFile:O_DIRECT", [&](void* arg) {
int* val = static_cast<int*>(arg);
*val &= ~O_DIRECT;
});
"NewRandomAccessFile:O_DIRECT", [&](void* arg) {
int* val = static_cast<int*>(arg);
*val &= ~O_DIRECT;
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
#endif

Expand Down
69 changes: 68 additions & 1 deletion util/crc32c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,32 @@
#endif
#include "util/coding.h"

#ifdef __powerpc64__
#include "util/crc32c_ppc.h"
#include "util/crc32c_ppc_constants.h"

#if __linux__
#include <sys/auxv.h>

#ifndef PPC_FEATURE2_VEC_CRYPTO
#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
#endif

#ifndef AT_HWCAP2
#define AT_HWCAP2 26
#endif

#endif /* __linux__ */

#endif

namespace rocksdb {
namespace crc32c {

#ifdef __powerpc64__
static int arch_ppc_crc32 = 0;
#endif

static const uint32_t table0_[256] = {
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
Expand Down Expand Up @@ -371,6 +394,7 @@ uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
}

// Detect if SS42 or not.
#ifndef HAVE_POWER8
static bool isSSE42() {
#ifndef HAVE_SSE42
return false;
Expand All @@ -387,15 +411,58 @@ static bool isSSE42() {
return false;
#endif
}
#endif

typedef uint32_t (*Function)(uint32_t, const char*, size_t);

#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
uint32_t ExtendPPCImpl(uint32_t crc, const char *buf, size_t size) {
return crc32c_ppc(crc, (const unsigned char *)buf, size);
}

#if __linux__
static int arch_ppc_probe(void) {
arch_ppc_crc32 = 0;

#if defined(__powerpc64__)
if (getauxval(AT_HWCAP2) & PPC_FEATURE2_VEC_CRYPTO) arch_ppc_crc32 = 1;
#endif /* __powerpc64__ */

return arch_ppc_crc32;
}
#endif // __linux__

static bool isAltiVec() {
if (arch_ppc_probe()) {
return true;
} else {
return false;
}
}
#endif

static inline Function Choose_Extend() {
#ifndef HAVE_POWER8
return isSSE42() ? ExtendImpl<Fast_CRC32> : ExtendImpl<Slow_CRC32>;
#else
return isAltiVec() ? ExtendPPCImpl : ExtendImpl<Slow_CRC32>;
#endif
}

bool IsFastCrc32Supported() {
return isSSE42();
bool has_fast_crc = false;
#ifdef HAVE_POWER8
#ifdef HAS_ALTIVEC
if (arch_ppc_probe()) {
has_fast_crc = true;
}
#else
has_fast_crc = false;
#endif
#else
has_fast_crc = isSSE42();
#endif
return has_fast_crc;
}

Function ChosenExtend = Choose_Extend();
Expand Down
95 changes: 95 additions & 0 deletions util/crc32c_ppc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright (c) 2017 International Business Machines Corp.
// All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// This source code is also licensed under the GPLv2 license found in the
// COPYING file in the root directory of this source tree.

#define CRC_TABLE
#include <inttypes.h>
#include <stdlib.h>
#include <strings.h>
#include "util/crc32c_ppc_constants.h"

#define VMX_ALIGN 16
#define VMX_ALIGN_MASK (VMX_ALIGN - 1)

#ifdef REFLECT
static unsigned int crc32_align(unsigned int crc, unsigned char const *p,
unsigned long len) {
while (len--) crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
return crc;
}
#endif

#ifdef HAVE_POWER8
unsigned int __crc32_vpmsum(unsigned int crc, unsigned char const *p,
unsigned long len);

static uint32_t crc32_vpmsum(uint32_t crc, unsigned char const *data,
unsigned len) {
unsigned int prealign;
unsigned int tail;

#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif

if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
crc = crc32_align(crc, data, (unsigned long)len);
goto out;
}

if ((unsigned long)data & VMX_ALIGN_MASK) {
prealign = VMX_ALIGN - ((unsigned long)data & VMX_ALIGN_MASK);
crc = crc32_align(crc, data, prealign);
len -= prealign;
data += prealign;
}

crc = __crc32_vpmsum(crc, data, (unsigned long)len & ~VMX_ALIGN_MASK);

tail = len & VMX_ALIGN_MASK;
if (tail) {
data += len & ~VMX_ALIGN_MASK;
crc = crc32_align(crc, data, tail);
}

out:
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif

return crc;
}

/* This wrapper function works around the fact that crc32_vpmsum
* does not gracefully handle the case where the data pointer is NULL. There
* may be room for performance improvement here.
*/
uint32_t crc32c_ppc(uint32_t crc, unsigned char const *data, unsigned len) {
unsigned char *buf2;

if (!data) {
buf2 = (unsigned char *)malloc(len);
bzero(buf2, len);
crc = crc32_vpmsum(crc, buf2, len);
free(buf2);
} else {
crc = crc32_vpmsum(crc, data, (unsigned long)len);
}
return crc;
}

#else /* HAVE_POWER8 */

/* This symbol has to exist on non-ppc architectures (and on legacy
* ppc systems using power7 or below) in order to compile properly
* there, even though it won't be called.
*/
uint32_t crc32c_ppc(uint32_t crc, unsigned char const *data, unsigned len) {
return 0;
}

#endif /* HAVE_POWER8 */
Loading

0 comments on commit 2289d38

Please sign in to comment.