Skip to content

Commit

Permalink
[C++] Support protobuf native schema (apache#11388)
Browse files Browse the repository at this point in the history
* Add C++ ProtobufNativeSchema implementation

* Add tests for ProtobufNativeSchema

* Fix schema type error and add tests for protobuf native schema

* Upgrade pulsar-build image's protobuf to 3.17.1

* Update protobuf dependency version in documents

* Add missed comments

* Fix CentOS 7 build
  • Loading branch information
BewareMyPower authored Jul 21, 2021
1 parent 5ad4059 commit 992760e
Show file tree
Hide file tree
Showing 17 changed files with 297 additions and 1,207 deletions.
30 changes: 12 additions & 18 deletions build/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,27 @@ FROM ubuntu:16.04

# prepare the directory for pulsar related files
RUN mkdir /pulsar
ADD protobuf.patch /pulsar

RUN apt-get update && \
apt-get install -y software-properties-common && \
add-apt-repository ppa:openjdk-r/ppa && \
apt-get update && \
apt-get install -y tig g++ cmake libssl-dev libcurl4-openssl-dev \
liblog4cxx-dev libprotobuf-dev google-mock libgtest-dev \
liblog4cxx-dev google-mock libgtest-dev \
libboost-dev libboost-program-options-dev libboost-system-dev libboost-python-dev \
libxml2-utils protobuf-compiler wget \
curl doxygen openjdk-8-jdk-headless openjdk-11-jdk-headless clang-format-5.0 \
gnupg2 golang-1.13-go zip unzip libzstd-dev libsnappy-dev python3-pip libpython-dev

# Build protobuf 3.x.y from source since the default protobuf from Ubuntu's apt source is 2.x.y
RUN curl -O -L https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-cpp-3.17.3.tar.gz && \
tar xvfz protobuf-cpp-3.17.3.tar.gz && \
cd protobuf-3.17.3/ && \
CXXFLAGS=-fPIC ./configure && \
make -j8 && make install && \
cd .. && rm -rf protobuf-3.17.3/ protobuf-cpp-3.17.3.tar.gz
ENV LD_LIBRARY_PATH /usr/local/lib

# Compile and install gtest
RUN cd /usr/src/gtest && cmake . && make && cp libgtest.a /usr/lib

Expand All @@ -49,8 +57,8 @@ ENV JAVA_HOME_11=/usr/lib/jvm/java-1.11.0-openjdk-amd64
## Website build dependencies

# Install Ruby-2.4.1
RUN apt-get install -y
RUN gpg2 --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB && \
RUN (curl -sSL https://rvm.io/mpapis.asc | gpg --import -) && \
(curl -sSL https://rvm.io/pkuczynski.asc | gpg --import -) && \
(curl -sSL https://get.rvm.io | bash -s stable)
ENV PATH "$PATH:/usr/local/rvm/bin"
RUN rvm install 2.4.1
Expand All @@ -70,20 +78,6 @@ RUN dpkg -i crowdin.deb
RUN wget https://bootstrap.pypa.io/pip/2.7/get-pip.py && python get-pip.py && rm get-pip.py
RUN pip3 install pdoc

# Install Protobuf doc generator (requires Go)
ENV GOPATH "$HOME/go"
ENV PATH "/usr/lib/go-1.13/bin:$GOPATH/bin:$PATH"
RUN go get -u github.com/pseudomuto/protoc-gen-doc/cmd/protoc-gen-doc

# Build the patched protoc
RUN git clone https://github.com/google/protobuf.git /pulsar/protobuf && \
cd /pulsar/protobuf && \
git checkout v2.4.1 && \
patch -p1 < /pulsar/protobuf.patch && \
autoreconf --install && \
./configure && \
make

# Installation
ARG MAVEN_VERSION=3.6.3
ARG MAVEN_FILENAME="apache-maven-${MAVEN_VERSION}-bin.tar.gz"
Expand Down
1,170 changes: 0 additions & 1,170 deletions build/docker/protobuf.patch

This file was deleted.

22 changes: 12 additions & 10 deletions pulsar-client-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,12 @@ unset(OPENSSL_VERSION CACHE)

if (LINK_STATIC)
find_library(ZLIB_LIBRARIES REQUIRED NAMES libz.a z zlib)
find_library(Protobuf_LITE_LIBRARIES NAMES libprotobuf-lite.a libprotobuf-lite)
find_library(Protobuf_LIBRARIES NAMES libprotobuf.a libprotobuf)
find_library(CURL_LIBRARIES NAMES libcurl.a curl curl_a libcurl_a)
find_library(LIB_ZSTD NAMES libzstd.a)
find_library(LIB_SNAPPY NAMES libsnappy.a)
message(STATUS "Protobuf_LITE_LIBRARIES: ${Protobuf_LITE_LIBRARIES}")
set(COMMON_LIBS ${Protobuf_LITE_LIBRARIES} ${COMMON_LIBS})
message(STATUS "Protobuf_LIBRARIES: ${Protobuf_LIBRARIES}")
set(COMMON_LIBS ${Protobuf_LIBRARIES} ${COMMON_LIBS})

if (USE_LOG4CXX)
if (LOG4CXX_USE_DYNAMIC_LIBS)
Expand Down Expand Up @@ -163,28 +163,30 @@ else()
find_package(ZLIB REQUIRED)
set(ZLIB_LIBRARIES ${ZLIB_LIBRARIES})
# NOTE: The default MODULE mode may not find debug libraries so use CONFIG mode here
unset(Protobuf_INCLUDE_DIRS CACHE)
unset(Protobuf_LIBRARIES CACHE)
find_package(Protobuf QUIET CONFIG)
# NOTE: On Windows x86 platform, Protobuf_FOUND might be set false but Protobuf_INCLUDE_DIRS and
# Protobuf_LITE_LIBRARIES are both found.
if (Protobuf_INCLUDE_DIRS AND Protobuf_LITE_LIBRARIES AND NOT Protobuf_FOUND)
# Protobuf_LIBRARIES are both found.
if (Protobuf_INCLUDE_DIRS AND Protobuf_LIBRARIES AND NOT Protobuf_FOUND)
set(Protobuf_FOUND TRUE)
endif ()
if (Protobuf_FOUND)
message("Found Protobuf in config mode")
message(STATUS "Protobuf_LITE_LIBRARIES: ${Protobuf_LITE_LIBRARIES}")
message(STATUS "Protobuf_LIBRARIES: ${Protobuf_LIBRARIES}")
message(STATUS "Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS}")
else ()
message("Failed to find Protobuf in config mode, try to find it from system path")
find_library(Protobuf_LITE_LIBRARIES protobuf-lite libprotobuf-lite)
find_library(Protobuf_LIBRARIES protobuf libprotobuf)
find_path(Protobuf_INCLUDE_DIRS google/protobuf/stubs/common.h)
message(STATUS "Protobuf_LITE_LIBRARIES: ${Protobuf_LITE_LIBRARIES}")
message(STATUS "Protobuf_LIBRARIES: ${Protobuf_LIBRARIES}")
message(STATUS "Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS}")
endif ()

if (${Protobuf_FOUND} AND (${CMAKE_VERSION} VERSION_GREATER 3.8))
set(COMMON_LIBS protobuf::libprotobuf-lite ${COMMON_LIBS})
set(COMMON_LIBS protobuf::libprotobuf ${COMMON_LIBS})
else ()
set(COMMON_LIBS ${Protobuf_LITE_LIBRARIES} ${COMMON_LIBS})
set(COMMON_LIBS ${Protobuf_LIBRARIES} ${COMMON_LIBS})
endif ()

if (MSVC AND (${CMAKE_BUILD_TYPE} STREQUAL Debug))
Expand Down
2 changes: 1 addition & 1 deletion pulsar-client-cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ https://github.com/apache/pulsar/tree/master/pulsar-client-cpp/examples
* A C++ compiler that supports C++11, like GCC >= 4.8
* CMake >= 3.4
* [Boost](http://www.boost.org/)
* [Protocol Buffer](https://developers.google.com/protocol-buffers/)
* [Protocol Buffer](https://developers.google.com/protocol-buffers/) >= 3
* [libcurl](https://curl.se/libcurl/)
* [openssl](https://github.com/openssl/openssl)

Expand Down
2 changes: 1 addition & 1 deletion pulsar-client-cpp/docker-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ ROOT_DIR=$(git rev-parse --show-toplevel)
cd $ROOT_DIR/pulsar-client-cpp

BUILD_IMAGE_NAME="${BUILD_IMAGE_NAME:-apachepulsar/pulsar-build}"
BUILD_IMAGE_VERSION="${BUILD_IMAGE_VERSION:-ubuntu-16.04-py2}"
BUILD_IMAGE_VERSION="${BUILD_IMAGE_VERSION:-ubuntu-16.04-pb3}"

IMAGE="$BUILD_IMAGE_NAME:$BUILD_IMAGE_VERSION"

Expand Down
2 changes: 1 addition & 1 deletion pulsar-client-cpp/docker-format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ ROOT_DIR=$(git rev-parse --show-toplevel)
cd $ROOT_DIR/pulsar-client-cpp

BUILD_IMAGE_NAME="${BUILD_IMAGE_NAME:-apachepulsar/pulsar-build}"
BUILD_IMAGE_VERSION="${BUILD_IMAGE_VERSION:-ubuntu-16.04}"
BUILD_IMAGE_VERSION="${BUILD_IMAGE_VERSION:-ubuntu-16.04-pb3}"

IMAGE="$BUILD_IMAGE_NAME:$BUILD_IMAGE_VERSION"

Expand Down
4 changes: 2 additions & 2 deletions pulsar-client-cpp/docker-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ ROOT_DIR=$(git rev-parse --show-toplevel)
cd $ROOT_DIR/pulsar-client-cpp

BUILD_IMAGE_NAME="${BUILD_IMAGE_NAME:-apachepulsar/pulsar-build}"
BUILD_IMAGE_VERSION="${BUILD_IMAGE_VERSION:-ubuntu-16.04-py2}"
BUILD_IMAGE_VERSION="${BUILD_IMAGE_VERSION:-ubuntu-16.04-pb3}"

IMAGE="$BUILD_IMAGE_NAME:$BUILD_IMAGE_VERSION"

Expand Down Expand Up @@ -82,4 +82,4 @@ if [ $RES -ne 0 ]; then
fi
)
fi
exit $RES
exit $RES
8 changes: 7 additions & 1 deletion pulsar-client-cpp/docker/centos-7/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,16 @@
FROM centos:7.6.1810

RUN yum install -y gcc gcc-c++ make \
protobuf-devel.x86_64 protobuf-lite-devel.x86_64 \
protobuf-devel.x86_64 \
libcurl-devel openssl-devel \
boost boost-devel

RUN curl -O -L https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-cpp-3.17.3.tar.gz && \
tar xfz protobuf-cpp-3.17.3.tar.gz && \
cd protobuf-3.17.3/ && \
CXXFLAGS=-fPIC ./configure && \
make -j8 && make install && \
cd .. && rm -rf protobuf-3.17.3/ protobuf-cpp-3.17.3.tar.gz
RUN mkdir -p /opt/cmake
WORKDIR /opt/cmake
RUN curl -L -O https://cmake.org/files/v3.4/cmake-3.4.0-Linux-x86_64.tar.gz \
Expand Down
35 changes: 35 additions & 0 deletions pulsar-client-cpp/include/pulsar/ProtobufNativeSchema.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#pragma once

#include <pulsar/Schema.h>
#include <google/protobuf/descriptor.h>

namespace pulsar {

/**
* Create a protobuf native schema using a descriptor.
*
* @param descriptor the Descriptor object of the target class
* @return the protobuf native schema
* @throw std::invalid_argument if descriptor is nullptr
*/
PULSAR_PUBLIC SchemaInfo createProtobufNativeSchema(const google::protobuf::Descriptor* descriptor);

} // namespace pulsar
5 changes: 5 additions & 0 deletions pulsar-client-cpp/include/pulsar/Schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ enum SchemaType
*/
KEY_VALUE = 15,

/**
* Protobuf native schema based on Descriptor.
*/
PROTOBUF_NATIVE = 20,

/**
* A bytes array.
*/
Expand Down
3 changes: 3 additions & 0 deletions pulsar-client-cpp/lib/Commands.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ static inline bool isBuiltInSchema(SchemaType schemaType) {
case JSON:
case AVRO:
case PROTOBUF:
case PROTOBUF_NATIVE:
return true;

default:
Expand All @@ -61,6 +62,8 @@ static inline proto::Schema_Type getSchemaType(SchemaType type) {
return Schema_Type_Protobuf;
case AVRO:
return Schema_Type_Avro;
case PROTOBUF_NATIVE:
return Schema_Type_ProtobufNative;
default:
return Schema_Type_None;
}
Expand Down
71 changes: 71 additions & 0 deletions pulsar-client-cpp/lib/ProtobufNativeSchema.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "pulsar/ProtobufNativeSchema.h"

#include <stdexcept>
#include <vector>

#include <boost/archive/iterators/base64_from_binary.hpp>
#include <boost/archive/iterators/transform_width.hpp>
#include <google/protobuf/descriptor.pb.h>

using google::protobuf::FileDescriptor;
using google::protobuf::FileDescriptorSet;

namespace pulsar {

void internalCollectFileDescriptors(const FileDescriptor* fileDescriptor,
FileDescriptorSet& fileDescriptorSet);

SchemaInfo createProtobufNativeSchema(const google::protobuf::Descriptor* descriptor) {
if (!descriptor) {
throw std::invalid_argument("descriptor is null");
}

const auto fileDescriptor = descriptor->file();
const std::string rootMessageTypeName = descriptor->full_name();
const std::string rootFileDescriptorName = fileDescriptor->name();

FileDescriptorSet fileDescriptorSet;
internalCollectFileDescriptors(fileDescriptor, fileDescriptorSet);

using namespace boost::archive::iterators;
using base64 = base64_from_binary<transform_width<const char*, 6, 8>>;

std::vector<char> bytes(fileDescriptorSet.ByteSizeLong());
fileDescriptorSet.SerializeToArray(bytes.data(), bytes.size());

const std::string schemaJson =
R"({"fileDescriptorSet":")" + std::string(base64(bytes.data()), base64(bytes.data() + bytes.size())) +
R"(","rootMessageTypeName":")" + rootMessageTypeName +
R"(","rootFileDescriptorName":")" + rootFileDescriptorName + R"("})";

return SchemaInfo(SchemaType::PROTOBUF_NATIVE, "", schemaJson);
}

void internalCollectFileDescriptors(const FileDescriptor* fileDescriptor,
FileDescriptorSet& fileDescriptorSet) {
fileDescriptor->CopyTo(fileDescriptorSet.add_file());
for (int i = 0; i < fileDescriptor->dependency_count(); i++) {
// collect the file descriptors recursively
internalCollectFileDescriptors(fileDescriptor->dependency(i), fileDescriptorSet);
}
}

} // namespace pulsar
2 changes: 2 additions & 0 deletions pulsar-client-cpp/lib/Schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ PULSAR_PUBLIC const char *strSchemaType(SchemaType schemaType) {
return "AUTO_PUBLISH";
case KEY_VALUE:
return "KEY_VALUE";
case PROTOBUF_NATIVE:
return "PROTOBUF_NATIVE";
};
// NOTE : Do not add default case in the switch above. In future if we get new cases for
// Schema and miss them in the switch above we would like to get notified. Adding
Expand Down
17 changes: 16 additions & 1 deletion pulsar-client-cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,21 @@
# under the License.
#

if (NOT PROTOC_PATH)
set(PROTOC_PATH protoc)
endif()

set(LIB_AUTOGEN_DIR ${AUTOGEN_DIR}/tests)
file(MAKE_DIRECTORY ${LIB_AUTOGEN_DIR})
include_directories(${LIB_AUTOGEN_DIR})

set(PROTO_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../pulsar-client/src/test/proto)
set(PROTO_SOURCES ${LIB_AUTOGEN_DIR}/Test.pb.cc ${LIB_AUTOGEN_DIR}/ExternalTest.pb.cc)
add_custom_command(
OUTPUT ${PROTO_SOURCES}
COMMAND ${PROTOC_PATH} -I ${PROTO_DIR} ${PROTO_DIR}/Test.proto ${PROTO_DIR}/ExternalTest.proto --cpp_out=${LIB_AUTOGEN_DIR})
include_directories(${LIB_AUTOGEN_DIR})

find_library(GMOCK_LIBRARY_PATH gmock)
find_library(GTEST_LIBRARY_PATH gtest)
find_library(GMOCKD_LIBRARY_PATH gmockd)
Expand All @@ -30,7 +45,7 @@ endif()

file(GLOB TEST_SOURCES *.cc)

add_executable(main ${TEST_SOURCES})
add_executable(main ${TEST_SOURCES} ${PROTO_SOURCES})

target_include_directories(main PRIVATE ${CMAKE_SOURCE_DIR}/lib ${AUTOGEN_DIR}/lib)

Expand Down
Loading

0 comments on commit 992760e

Please sign in to comment.