Skip to content

Commit

Permalink
Refactor thrift for ext-data-source to generate only necessary structs
Browse files Browse the repository at this point in the history
ext-data-source only needs a small subset of the thrift structures, so this
separates the dependencies between files so that just the necessary structs
are generated for ext-data-source. Afterwards, we can remove extra maven
dependencies which were using environment variables to get versions. While the
environment variables work when building the pom, they are not propagated to
dependencies so building fe/pom.xml ended up producing lots of warnings which
are now gone.

Change-Id: I267fe7bc7a54c3c21aad8c1ffce07cf1a1e07c5e
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3748
Reviewed-by: Matthew Jacobs <[email protected]>
Tested-by: jenkins
(cherry picked from commit 1f73896)
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3767
  • Loading branch information
Matthew Jacobs authored and jenkins committed Aug 5, 2014
1 parent 419c1e6 commit 162d745
Show file tree
Hide file tree
Showing 17 changed files with 123 additions and 71 deletions.
2 changes: 2 additions & 0 deletions be/generated-sources/gen-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ set(SRC_FILES
ResourceBrokerService_constants.cpp
ResourceBrokerService_types.cpp
ResourceBrokerService.cpp
Results_constants.cpp
Results_types.cpp
Partitions_constants.cpp
Partitions_types.cpp
Planner_constants.cpp
Expand Down
1 change: 0 additions & 1 deletion be/src/exec/hdfs-table-sink.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
#include <boost/date_time/posix_time/posix_time.hpp>
#include <stdlib.h>

#include "gen-cpp/Data_types.h"
#include "gen-cpp/ImpalaInternalService_constants.h"

using namespace std;
Expand Down
1 change: 0 additions & 1 deletion be/src/runtime/data-stream-recvr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include <boost/thread/locks.hpp>
#include <boost/thread/mutex.hpp>

#include "gen-cpp/Data_types.h"
#include "runtime/data-stream-recvr.h"
#include "runtime/data-stream-mgr.h"
#include "runtime/row-batch.h"
Expand Down
3 changes: 2 additions & 1 deletion be/src/runtime/data-stream-recvr.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@

#include "common/object-pool.h"
#include "common/status.h"
#include "gen-cpp/Types_types.h" // for TUniqueId
#include "gen-cpp/Types_types.h" // for TUniqueId
#include "gen-cpp/Results_types.h" // for TRowBatch
#include "runtime/descriptors.h"
#include "util/tuple-row-compare.h"

Expand Down
2 changes: 1 addition & 1 deletion be/src/runtime/data-stream-sender.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include "common/object-pool.h"
#include "common/status.h"
#include "util/runtime-profile.h"
#include "gen-cpp/Data_types.h" // for TRowBatch
#include "gen-cpp/Results_types.h" // for TRowBatch

namespace impala {

Expand Down
2 changes: 1 addition & 1 deletion be/src/runtime/row-batch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include "runtime/mem-tracker.h"
#include "util/compress.h"
#include "util/decompress.h"
#include "gen-cpp/Data_types.h"
#include "gen-cpp/Results_types.h"

using namespace boost;
using namespace std;
Expand Down
1 change: 1 addition & 0 deletions bin/impala-config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ export IMPALA_SENTRY_VERSION=1.3.0-cdh5.1.0-SNAPSHOT
export IMPALA_LLAMA_VERSION=1.0.0-cdh5.1.0-SNAPSHOT
export IMPALA_AVRO_VERSION=1.7.4
export IMPALA_PARQUET_VERSION=1.2.5-cdh5.1.0-SNAPSHOT
# If the thrift version changes, please update ext-data-source/pom.xml
export IMPALA_THRIFT_VERSION=0.9.0
export IMPALA_LLVM_VERSION=3.3

Expand Down
45 changes: 39 additions & 6 deletions common/thrift/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ function(THRIFT_GEN VAR)
OUTPUT ${OUTPUT_BE_FILE}
COMMAND ${THRIFT_COMPILER} ${CPP_ARGS} ${FIL}
COMMAND ${THRIFT_COMPILER} ${JAVA_FE_ARGS} ${FIL}
COMMAND ${THRIFT_COMPILER} ${JAVA_EXT_DS_ARGS} ${FIL}
COMMAND ${THRIFT_COMPILER} ${PYTHON_ARGS} ${FIL}
DEPENDS ${ABS_FIL}
COMMENT "Running thrift compiler on ${FIL}"
Expand All @@ -88,6 +87,31 @@ function(THRIFT_GEN VAR)
set(${VAR} ${${VAR}} PARENT_SCOPE)
endfunction(THRIFT_GEN)

function(THRIFT_GEN_DS VAR)
IF (NOT ARGN)
MESSAGE(SEND_ERROR "Error: THRIFT_GEN_DS called without any src files")
RETURN()
ENDIF(NOT ARGN)

set(${VAR})
foreach(FIL ${ARGN})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(FIL_WE ${FIL} NAME_WE)

# This isn't the exact output file that's created, just a unique file
set(OUTPUT_FILE "${EXT_DS_OUTPUT_DIR}/${FIL_WE}.java")
list(APPEND ${VAR} ${OUTPUT_FILE})
add_custom_command(
OUTPUT ${OUTPUT_FILE}
COMMAND ${THRIFT_COMPILER} ${JAVA_EXT_DS_ARGS} ${FIL}
DEPENDS ${ABS_FIL}
COMMENT "Running thrift compiler for ext-data-source on ${FIL}"
VERBATIM
)
endforeach(FIL)
set(${VAR} ${${VAR}} PARENT_SCOPE)
endfunction(THRIFT_GEN_DS)

message("Using Thrift compiler: ${THRIFT_COMPILER}")
set(THRIFT_INCLUDE_DIR_OPTION -I ${THRIFT_CONTRIB_DIR} -I $ENV{HIVE_HOME}/src/metastore/if)
set(BE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/be/generated-sources)
Expand All @@ -106,14 +130,20 @@ set(JAVA_FE_ARGS ${THRIFT_INCLUDE_DIR_OPTION} --gen java:hashcode -o ${FE_OUTPUT
set(JAVA_EXT_DS_ARGS ${THRIFT_INCLUDE_DIR_OPTION} --gen java:hashcode -o ${EXT_DS_OUTPUT_DIR})
set(PYTHON_ARGS ${THRIFT_INCLUDE_DIR_OPTION} -r --gen py -o ${PYTHON_OUTPUT_DIR})

set (EXT_DATA_SRC_FILES
ExternalDataSource.thrift
Data.thrift
Status.thrift
Types.thrift
)

set (SRC_FILES
beeswax.thrift
CatalogInternalService.thrift
CatalogObjects.thrift
CatalogService.thrift
cli_service.thrift
DataSinks.thrift
Data.thrift
Descriptors.thrift
ExecStats.thrift
Frontend.thrift
Expand All @@ -130,15 +160,18 @@ set (SRC_FILES
Partitions.thrift
parquet.thrift
ResourceBrokerService.thrift
Results.thrift
RuntimeProfile.thrift
StatestoreService.thrift
Status.thrift
Types.thrift
${EXT_DATA_SRC_FILES}
)


# Create a build command for each of the thrift src files and generate
# a list of files they produce
THRIFT_GEN(THRIFT_FILES ${SRC_FILES})
THRIFT_GEN(THRIFT_ALL_FILES ${SRC_FILES})
THRIFT_GEN_DS(THRIFT_DATA_SRC_FILES ${EXT_DATA_SRC_FILES})

# Add a custom target that generates all the thrift files
add_custom_target(thrift-cpp ALL DEPENDS ${THRIFT_FILES})
add_custom_target(thrift-cpp ALL DEPENDS ${THRIFT_ALL_FILES})
add_custom_target(thrift-ext-data-src ALL DEPENDS ${THRIFT_DATA_SRC_FILES})
4 changes: 2 additions & 2 deletions common/thrift/CatalogService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ include "CatalogObjects.thrift"
include "JniCatalog.thrift"
include "Types.thrift"
include "Status.thrift"
include "Data.thrift"
include "Results.thrift"

// CatalogServer service API and related structs.

Expand Down Expand Up @@ -109,7 +109,7 @@ struct TDdlExecResponse {

// Result of DDL operation to be returned to the client. Currently only set
// by COMPUTE STATS.
3: optional Data.TResultSet result_set
3: optional Results.TResultSet result_set
}

// Updates the metastore with new partition information and returns a response
Expand Down
38 changes: 0 additions & 38 deletions common/thrift/Data.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,6 @@
namespace cpp impala
namespace java com.cloudera.impala.thrift

include "Types.thrift"
include "CatalogObjects.thrift"

// Serialized, self-contained version of a RowBatch (in be/src/runtime/row-batch.h).
struct TRowBatch {
// total number of rows contained in this batch
1: required i32 num_rows

// row composition
2: required list<Types.TTupleId> row_tuples

// There are a total of num_rows * num_tuples_per_row offsets
// pointing into tuple_data.
// An offset of -1 records a NULL.
3: list<i32> tuple_offsets

// binary tuple data
// TODO: figure out how we can avoid copying the data during TRowBatch construction
4: string tuple_data

// Indicates the type of compression used
5: required CatalogObjects.THdfsCompression compression_type

// Indicates the uncompressed size
6:i32 uncompressed_size
}

// this is a union over all possible return types
struct TColumnValue {
1: optional bool bool_val
Expand Down Expand Up @@ -75,14 +48,3 @@ struct TColumnData {
8: optional list<string> string_vals;
9: optional list<binary> binary_vals;
}

struct TResultSetMetadata {
1: required list<CatalogObjects.TColumn> columns
}

// List of rows and metadata describing their columns.
struct TResultSet {
1: required list<TResultRow> rows
2: required TResultSetMetadata schema
}

5 changes: 3 additions & 2 deletions common/thrift/Frontend.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ include "PlanNodes.thrift"
include "Planner.thrift"
include "Descriptors.thrift"
include "Data.thrift"
include "Results.thrift"
include "Exprs.thrift"
include "cli_service.thrift"
include "Status.thrift"
Expand Down Expand Up @@ -289,7 +290,7 @@ struct TQueryExecRequest {
per_node_scan_ranges

// Metadata of the query result set (only for select)
5: optional Data.TResultSetMetadata result_set_metadata
5: optional Results.TResultSetMetadata result_set_metadata

// Set if the query needs finalization after it executes
6: optional TFinalizeParams finalize_params
Expand Down Expand Up @@ -435,7 +436,7 @@ struct TExecRequest {
4: optional TCatalogOpRequest catalog_op_request

// Metadata of the query result set (not set for DML)
5: optional Data.TResultSetMetadata result_set_metadata
5: optional Results.TResultSetMetadata result_set_metadata

// Result of EXPLAIN. Set iff stmt_type is EXPLAIN
6: optional TExplainResult explain_result
Expand Down
4 changes: 2 additions & 2 deletions common/thrift/ImpalaInternalService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ include "Descriptors.thrift"
include "PlanNodes.thrift"
include "Planner.thrift"
include "DataSinks.thrift"
include "Data.thrift"
include "Results.thrift"
include "RuntimeProfile.thrift"
include "ImpalaService.thrift"
include "Llama.thrift"
Expand Down Expand Up @@ -373,7 +373,7 @@ struct TTransmitDataParams {
4: optional Types.TPlanNodeId dest_node_id

// required in V1
5: optional Data.TRowBatch row_batch
5: optional Results.TRowBatch row_batch

// if set to true, indicates that no more row batches will be sent
// for this dest_node_id
Expand Down
55 changes: 55 additions & 0 deletions common/thrift/Results.thrift
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright 2012 Cloudera Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

namespace cpp impala
namespace java com.cloudera.impala.thrift

include "Data.thrift"
include "Types.thrift"
include "CatalogObjects.thrift"

// Serialized, self-contained version of a RowBatch (in be/src/runtime/row-batch.h).
struct TRowBatch {
// total number of rows contained in this batch
1: required i32 num_rows

// row composition
2: required list<Types.TTupleId> row_tuples

// There are a total of num_rows * num_tuples_per_row offsets
// pointing into tuple_data.
// An offset of -1 records a NULL.
3: list<i32> tuple_offsets

// binary tuple data
// TODO: figure out how we can avoid copying the data during TRowBatch construction
4: string tuple_data

// Indicates the type of compression used
5: required CatalogObjects.THdfsCompression compression_type

// Indicates the uncompressed size
6:i32 uncompressed_size
}

struct TResultSetMetadata {
1: required list<CatalogObjects.TColumn> columns
}

// List of rows and metadata describing their columns.
struct TResultSet {
1: required list<Data.TResultRow> rows
2: required TResultSetMetadata schema
}

17 changes: 1 addition & 16 deletions ext-data-source/api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,6 @@
<packaging>jar</packaging>
<url>.</url>

<properties>
<hive.version>${env.IMPALA_HIVE_VERSION}</hive.version>
<parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version>
</properties>

<repositories>
<repository>
<id>cdh.repo</id>
Expand Down Expand Up @@ -71,17 +66,7 @@
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>${env.IMPALA_THRIFT_VERSION}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-hadoop-bundle</artifactId>
<version>${parquet.version}</version>
<version>${thrift.version}</version>
</dependency>
</dependencies>

Expand Down
4 changes: 4 additions & 0 deletions ext-data-source/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,8 @@
<module>test</module>
</modules>

<properties>
<thrift.version>0.9.0</thrift.version>
<guava.version>11.0.2</guava.version>
</properties>
</project>
5 changes: 5 additions & 0 deletions ext-data-source/sample/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@
<artifactId>impala-data-source-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
</dependencies>

<build>
Expand Down
5 changes: 5 additions & 0 deletions ext-data-source/test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@
<artifactId>impala-data-source-api</artifactId>
<version>${impala.extdatasrc.api.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
</dependencies>

<build>
Expand Down

0 comments on commit 162d745

Please sign in to comment.