Skip to content

Commit

Permalink
ARROW-6860: [Python][C++] Do not link shared libraries monolithically…
Browse files Browse the repository at this point in the history
… to pyarrow.lib, add libarrow_python_flight.so

Adding a new shared library libarrow_python_flight.so that allows us to link libarrow_flight and this new library to the Cython _flight extension. I initially tried moving the Flight Python bindings directly to libarrow_flight but realized this would create a transitive dependency on libpython which is not desirable. Any shared library that uses Python C APIs is expected to be loaded into a running Python interpreter and not linked explicitly to libpython

Because Apache ORC also needs to statically link Protocol Buffers, I have disabled it in the manylinux wheels. Hopefully we can come up with a solution where projects like Apache Beam, TensorFlow, and others can all use Protocol Buffers together and not have these problems

Closes apache#5627 from wesm/ARROW-6860 and squashes the following commits:

d5d67f8 <Wes McKinney> Revert libarrow_flight.pxd changes
b31fbdf <Wes McKinney> Build libarrow_python_flight that links to libarrow_python and libarrow_flight. Do not link all shared libraries to Cython "lib" extension

Authored-by: Wes McKinney <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
  • Loading branch information
wesm committed Oct 12, 2019
1 parent d47a40e commit 102acc4
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 50 deletions.
41 changes: 39 additions & 2 deletions cpp/cmake_modules/FindArrowFlight.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,13 @@ find_library(ARROW_FLIGHT_LIB_PATH
PATHS ${ARROW_SEARCH_LIB_PATH}
PATH_SUFFIXES ${LIB_PATH_SUFFIXES}
NO_DEFAULT_PATH)
find_library(ARROW_PYTHON_FLIGHT_LIB_PATH
NAMES arrow_python_flight
PATHS ${ARROW_SEARCH_LIB_PATH}
PATH_SUFFIXES ${LIB_PATH_SUFFIXES}
NO_DEFAULT_PATH)
get_filename_component(ARROW_FLIGHT_LIBS ${ARROW_FLIGHT_LIB_PATH} DIRECTORY)
get_filename_component(ARROW_PYTHON_FLIGHT_LIBS ${ARROW_PYTHON_FLIGHT_LIB_PATH} DIRECTORY)

if(MSVC)
# Prioritize "/bin" over LIB_PATH_SUFFIXES - DLL files are installed
Expand All @@ -77,7 +83,15 @@ if(MSVC)
PATHS ${ARROW_HOME}
PATH_SUFFIXES "bin" ${LIB_PATH_SUFFIXES}
NO_DEFAULT_PATH)
find_library(ARROW_PYTHON_FLIGHT_SHARED_LIBRARIES
NAMES arrow_flight
PATHS ${ARROW_HOME}
PATH_SUFFIXES "bin" ${LIB_PATH_SUFFIXES}
NO_DEFAULT_PATH)

get_filename_component(ARROW_FLIGHT_SHARED_LIBS ${ARROW_FLIGHT_SHARED_LIBRARIES} DIRECTORY)
get_filename_component(ARROW_PYTHON_FLIGHT_SHARED_LIBS
${ARROW_PYTHON_FLIGHT_SHARED_LIBRARIES} DIRECTORY)
endif()

if(ARROW_FLIGHT_INCLUDE_DIR AND ARROW_FLIGHT_LIBS)
Expand Down Expand Up @@ -117,9 +131,32 @@ else()
set(ARROW_FLIGHT_FOUND FALSE)
endif()

if(ARROW_PYTHON_FLIGHT_LIBS)
set(ARROW_PYTHON_FLIGHT_FOUND TRUE)
set(ARROW_PYTHON_FLIGHT_LIB_NAME arrow_python_flight)
if(MSVC)
set(
ARROW_PYTHON_FLIGHT_STATIC_LIB
${ARROW_PYTHON_FLIGHT_LIBS}/${ARROW_PYTHON_FLIGHT_LIB_NAME}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
)
set(ARROW_PYTHON_FLIGHT_SHARED_LIB
${ARROW_PYTHON_FLIGHT_SHARED_LIBS}/${ARROW_PYTHON_FLIGHT_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
set(ARROW_PYTHON_FLIGHT_SHARED_IMP_LIB ${ARROW_PYTHON_FLIGHT_LIBS}/${ARROW_PYTHON_FLIGHT_LIB_NAME}.lib)
else()
set(ARROW_PYTHON_FLIGHT_STATIC_LIB ${ARROW_LIBS}/lib${ARROW_PYTHON_FLIGHT_LIB_NAME}.a)
set(ARROW_PYTHON_FLIGHT_SHARED_LIB
${ARROW_LIBS}/lib${ARROW_PYTHON_FLIGHT_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()

message(STATUS "Found the Arrow Flight Python library: ${ARROW_PYTHON_FLIGHT_LIB_PATH}")
endif()

if(MSVC)
mark_as_advanced(ARROW_FLIGHT_INCLUDE_DIR ARROW_FLIGHT_STATIC_LIB ARROW_FLIGHT_SHARED_LIB
ARROW_FLIGHT_SHARED_IMP_LIB)
ARROW_FLIGHT_SHARED_IMP_LIB
ARROW_PYTHON_FLIGHT_STATIC_LIB ARROW_PYTHON_FLIGHT_SHARED_LIB
ARROW_PYTHON_FLIGHT_SHARED_IMP_LIB)
else()
mark_as_advanced(ARROW_FLIGHT_INCLUDE_DIR ARROW_FLIGHT_STATIC_LIB ARROW_FLIGHT_SHARED_LIB)
mark_as_advanced(ARROW_FLIGHT_INCLUDE_DIR ARROW_FLIGHT_STATIC_LIB ARROW_FLIGHT_SHARED_LIB
ARROW_PYTHON_FLIGHT_STATIC_LIB ARROW_PYTHON_FLIGHT_SHARED_LIB)
endif()
48 changes: 34 additions & 14 deletions cpp/src/arrow/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,26 +48,12 @@ set(ARROW_PYTHON_SRCS

set(ARROW_PYTHON_DEPENDENCIES arrow_dependencies)

if(ARROW_FLIGHT)
set(ARROW_PYTHON_DEPENDENCIES ${ARROW_PYTHON_DEPENDENCIES} flight_grpc_gen)
set(ARROW_PYTHON_SRCS ${ARROW_PYTHON_SRCS} flight.cc)
endif()

if("${COMPILER_FAMILY}" STREQUAL "clang")
set_property(SOURCE pyarrow.cc APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
endif()

set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared ${PYTHON_OTHER_LIBS})

if(ARROW_FLIGHT)
# Must link shared: we don't want to link more than one copy of gRPC
# into the eventual Cython shared object, otherwise gRPC calls fail
# with weird errors due to multiple copies of global static state
# (The other solution is to link gRPC shared everywhere instead of
# statically only in Flight)
set(ARROW_PYTHON_SHARED_LINK_LIBS ${ARROW_PYTHON_SHARED_LINK_LIBS} arrow_flight_shared)
endif()

if(WIN32)
set(ARROW_PYTHON_SHARED_LINK_LIBS ${ARROW_PYTHON_SHARED_LINK_LIBS} ${PYTHON_LIBRARIES})
endif()
Expand Down Expand Up @@ -100,6 +86,40 @@ if(ARROW_BUILD_STATIC AND MSVC)
target_compile_definitions(arrow_python_static PUBLIC ARROW_STATIC)
endif()

if(ARROW_FLIGHT AND ARROW_BUILD_SHARED)
# Must link to shared libarrow_flight: we don't want to link more than one
# copy of gRPC into the eventual Cython shared object, otherwise gRPC calls
# fail with weird errors due to multiple copies of global static state (The
# other solution is to link gRPC shared everywhere instead of statically only
# in Flight)
add_arrow_lib(arrow_python_flight
SOURCES
flight.cc
OUTPUTS
ARROW_PYFLIGHT_LIBRARIES
DEPENDENCIES
flight_grpc_gen
SHARED_LINK_FLAGS
${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
SHARED_LINK_LIBS
arrow_python_shared
arrow_flight_shared
STATIC_LINK_LIBS
${PYTHON_OTHER_LIBS}
EXTRA_INCLUDES
"${ARROW_PYTHON_INCLUDES}")

add_dependencies(arrow_python ${ARROW_PYFLIGHT_LIBRARIES})

foreach(LIB_TARGET ${ARROW_PYFLIGHT_LIBRARIES})
target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYFLIGHT_EXPORTING)
endforeach()

if(ARROW_BUILD_STATIC AND MSVC)
target_compile_definitions(arrow_python_flight_static PUBLIC ARROW_STATIC)
endif()
endif()

if("${COMPILER_FAMILY}" STREQUAL "clang")
# Clang, be quiet. Python C API has lots of macros
set_property(SOURCE ${ARROW_PYTHON_SRCS}
Expand Down
59 changes: 41 additions & 18 deletions cpp/src/arrow/python/flight.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,39 @@
#include "arrow/python/common.h"
#include "arrow/python/config.h"

#if defined(_WIN32) || defined(__CYGWIN__) // Windows
#if defined(_MSC_VER)
#pragma warning(disable : 4251)
#else
#pragma GCC diagnostic ignored "-Wattributes"
#endif

#ifdef ARROW_STATIC
#define ARROW_PYFLIGHT_EXPORT
#elif defined(ARROW_PYFLIGHT_EXPORTING)
#define ARROW_PYFLIGHT_EXPORT __declspec(dllexport)
#else
#define ARROW_PYFLIGHT_EXPORT __declspec(dllimport)
#endif

#else // Not Windows
#ifndef ARROW_PYFLIGHT_EXPORT
#define ARROW_PYFLIGHT_EXPORT __attribute__((visibility("default")))
#endif
#endif // Non-Windows

namespace arrow {

namespace py {

namespace flight {

ARROW_PYTHON_EXPORT
ARROW_PYFLIGHT_EXPORT
extern const char* kPyServerMiddlewareName;

/// \brief A table of function pointers for calling from C++ into
/// Python.
class ARROW_PYTHON_EXPORT PyFlightServerVtable {
class ARROW_PYFLIGHT_EXPORT PyFlightServerVtable {
public:
std::function<Status(PyObject*, const arrow::flight::ServerCallContext&,
const arrow::flight::Criteria*,
Expand Down Expand Up @@ -69,15 +90,15 @@ class ARROW_PYTHON_EXPORT PyFlightServerVtable {
list_actions;
};

class ARROW_PYTHON_EXPORT PyServerAuthHandlerVtable {
class ARROW_PYFLIGHT_EXPORT PyServerAuthHandlerVtable {
public:
std::function<Status(PyObject*, arrow::flight::ServerAuthSender*,
arrow::flight::ServerAuthReader*)>
authenticate;
std::function<Status(PyObject*, const std::string&, std::string*)> is_valid;
};

class ARROW_PYTHON_EXPORT PyClientAuthHandlerVtable {
class ARROW_PYFLIGHT_EXPORT PyClientAuthHandlerVtable {
public:
std::function<Status(PyObject*, arrow::flight::ClientAuthSender*,
arrow::flight::ClientAuthReader*)>
Expand All @@ -86,7 +107,8 @@ class ARROW_PYTHON_EXPORT PyClientAuthHandlerVtable {
};

/// \brief A helper to implement an auth mechanism in Python.
class ARROW_PYTHON_EXPORT PyServerAuthHandler : public arrow::flight::ServerAuthHandler {
class ARROW_PYFLIGHT_EXPORT PyServerAuthHandler
: public arrow::flight::ServerAuthHandler {
public:
explicit PyServerAuthHandler(PyObject* handler,
const PyServerAuthHandlerVtable& vtable);
Expand All @@ -100,7 +122,8 @@ class ARROW_PYTHON_EXPORT PyServerAuthHandler : public arrow::flight::ServerAuth
};

/// \brief A helper to implement an auth mechanism in Python.
class ARROW_PYTHON_EXPORT PyClientAuthHandler : public arrow::flight::ClientAuthHandler {
class ARROW_PYFLIGHT_EXPORT PyClientAuthHandler
: public arrow::flight::ClientAuthHandler {
public:
explicit PyClientAuthHandler(PyObject* handler,
const PyClientAuthHandlerVtable& vtable);
Expand All @@ -113,7 +136,7 @@ class ARROW_PYTHON_EXPORT PyClientAuthHandler : public arrow::flight::ClientAuth
PyClientAuthHandlerVtable vtable_;
};

class ARROW_PYTHON_EXPORT PyFlightServer : public arrow::flight::FlightServerBase {
class ARROW_PYFLIGHT_EXPORT PyFlightServer : public arrow::flight::FlightServerBase {
public:
explicit PyFlightServer(PyObject* server, const PyFlightServerVtable& vtable);

Expand Down Expand Up @@ -152,7 +175,7 @@ typedef std::function<Status(PyObject*, std::unique_ptr<arrow::flight::Result>*)
PyFlightResultStreamCallback;

/// \brief A ResultStream built around a Python callback.
class ARROW_PYTHON_EXPORT PyFlightResultStream : public arrow::flight::ResultStream {
class ARROW_PYFLIGHT_EXPORT PyFlightResultStream : public arrow::flight::ResultStream {
public:
/// \brief Construct a FlightResultStream from a Python object and callback.
/// Must only be called while holding the GIL.
Expand All @@ -167,7 +190,7 @@ class ARROW_PYTHON_EXPORT PyFlightResultStream : public arrow::flight::ResultStr

/// \brief A wrapper around a FlightDataStream that keeps alive a
/// Python object backing it.
class ARROW_PYTHON_EXPORT PyFlightDataStream : public arrow::flight::FlightDataStream {
class ARROW_PYFLIGHT_EXPORT PyFlightDataStream : public arrow::flight::FlightDataStream {
public:
/// \brief Construct a FlightDataStream from a Python object and underlying stream.
/// Must only be called while holding the GIL.
Expand All @@ -183,7 +206,7 @@ class ARROW_PYTHON_EXPORT PyFlightDataStream : public arrow::flight::FlightDataS
std::unique_ptr<arrow::flight::FlightDataStream> stream_;
};

class ARROW_PYTHON_EXPORT PyServerMiddlewareFactory
class ARROW_PYFLIGHT_EXPORT PyServerMiddlewareFactory
: public arrow::flight::ServerMiddlewareFactory {
public:
/// \brief A callback to create the middleware instance in Python
Expand All @@ -205,7 +228,7 @@ class ARROW_PYTHON_EXPORT PyServerMiddlewareFactory
StartCallCallback start_call_;
};

class ARROW_PYTHON_EXPORT PyServerMiddleware : public arrow::flight::ServerMiddleware {
class ARROW_PYFLIGHT_EXPORT PyServerMiddleware : public arrow::flight::ServerMiddleware {
public:
typedef std::function<Status(PyObject*,
arrow::flight::AddCallHeaders* outgoing_headers)>
Expand All @@ -231,7 +254,7 @@ class ARROW_PYTHON_EXPORT PyServerMiddleware : public arrow::flight::ServerMiddl
Vtable vtable_;
};

class ARROW_PYTHON_EXPORT PyClientMiddlewareFactory
class ARROW_PYFLIGHT_EXPORT PyClientMiddlewareFactory
: public arrow::flight::ClientMiddlewareFactory {
public:
/// \brief A callback to create the middleware instance in Python
Expand All @@ -251,7 +274,7 @@ class ARROW_PYTHON_EXPORT PyClientMiddlewareFactory
StartCallCallback start_call_;
};

class ARROW_PYTHON_EXPORT PyClientMiddleware : public arrow::flight::ClientMiddleware {
class ARROW_PYFLIGHT_EXPORT PyClientMiddleware : public arrow::flight::ClientMiddleware {
public:
typedef std::function<Status(PyObject*,
arrow::flight::AddCallHeaders* outgoing_headers)>
Expand Down Expand Up @@ -284,7 +307,7 @@ typedef std::function<Status(PyObject*, arrow::flight::FlightPayload*)>
PyGeneratorFlightDataStreamCallback;

/// \brief A FlightDataStream built around a Python callback.
class ARROW_PYTHON_EXPORT PyGeneratorFlightDataStream
class ARROW_PYFLIGHT_EXPORT PyGeneratorFlightDataStream
: public arrow::flight::FlightDataStream {
public:
/// \brief Construct a FlightDataStream from a Python object and underlying stream.
Expand All @@ -304,22 +327,22 @@ class ARROW_PYTHON_EXPORT PyGeneratorFlightDataStream
PyGeneratorFlightDataStreamCallback callback_;
};

ARROW_PYTHON_EXPORT
ARROW_PYFLIGHT_EXPORT
Status CreateFlightInfo(const std::shared_ptr<arrow::Schema>& schema,
const arrow::flight::FlightDescriptor& descriptor,
const std::vector<arrow::flight::FlightEndpoint>& endpoints,
int64_t total_records, int64_t total_bytes,
std::unique_ptr<arrow::flight::FlightInfo>* out);

ARROW_PYTHON_EXPORT
ARROW_PYFLIGHT_EXPORT
Status DeserializeBasicAuth(const std::string& buf,
std::unique_ptr<arrow::flight::BasicAuth>* out);

ARROW_PYTHON_EXPORT
ARROW_PYFLIGHT_EXPORT
Status SerializeBasicAuth(const arrow::flight::BasicAuth& basic_auth, std::string* out);

/// \brief Create a SchemaResult from schema.
ARROW_PYTHON_EXPORT
ARROW_PYFLIGHT_EXPORT
Status CreateSchemaResult(const std::shared_ptr<arrow::Schema>& schema,
std::unique_ptr<arrow::flight::SchemaResult>* out);

Expand Down
Loading

0 comments on commit 102acc4

Please sign in to comment.