Skip to content

Commit

Permalink
ARROW-9304: [C++] Add "AppendEmpty" builder APIs for use inside Struc…
Browse files Browse the repository at this point in the history
…tBuilder::AppendNull

Closes apache#7887 from tianchen92/ARROW-9304

Lead-authored-by: tianchen <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
tianchen92 and pitrou committed Oct 22, 2020
1 parent 5ad3c98 commit 843e8bb
Show file tree
Hide file tree
Showing 18 changed files with 424 additions and 65 deletions.
10 changes: 4 additions & 6 deletions cpp/src/arrow/array/array_struct_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

#include <cstdint>
#include <cstring>
#include <memory>
#include <vector>

#include <gtest/gtest.h>

#include "arrow/array.h"
#include "arrow/builder.h"
#include "arrow/status.h"
Expand Down Expand Up @@ -266,10 +266,8 @@ TEST_F(TestStructBuilder, TestAppendNull) {
ASSERT_EQ(2, result_->field(1)->length());
ASSERT_TRUE(result_->IsNull(0));
ASSERT_TRUE(result_->IsNull(1));
ASSERT_TRUE(result_->field(0)->IsNull(0));
ASSERT_TRUE(result_->field(0)->IsNull(1));
ASSERT_TRUE(result_->field(1)->IsNull(0));
ASSERT_TRUE(result_->field(1)->IsNull(1));
ASSERT_EQ(0, result_->field(0)->null_count());
ASSERT_EQ(0, result_->field(1)->null_count());

ASSERT_EQ(Type::LIST, result_->field(0)->type_id());
ASSERT_EQ(Type::INT32, result_->field(1)->type_id());
Expand Down
58 changes: 55 additions & 3 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
namespace arrow {

using internal::checked_cast;
using internal::checked_pointer_cast;

class TestArray : public ::testing::Test {
public:
Expand Down Expand Up @@ -641,7 +642,7 @@ class TestPrimitiveBuilder : public TestBuilder {
std::shared_ptr<Array> out;
FinishAndCheckPadding(builder.get(), &out);

std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(out);
std::shared_ptr<ArrayType> result = checked_pointer_cast<ArrayType>(out);

// Builder is now reset
ASSERT_EQ(0, builder->length());
Expand Down Expand Up @@ -766,7 +767,7 @@ void TestPrimitiveBuilder<PBoolean>::Check(const std::unique_ptr<BooleanBuilder>
std::shared_ptr<Array> out;
FinishAndCheckPadding(builder.get(), &out);

std::shared_ptr<BooleanArray> result = std::dynamic_pointer_cast<BooleanArray>(out);
std::shared_ptr<BooleanArray> result = checked_pointer_cast<BooleanArray>(out);

ASSERT_EQ(ex_null_count, result->null_count());
ASSERT_EQ(size, result->length());
Expand Down Expand Up @@ -883,7 +884,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {

std::shared_ptr<Array> out;
FinishAndCheckPadding(this->builder_.get(), &out);
auto result = std::dynamic_pointer_cast<typename TypeParam::ArrayType>(out);
auto result = checked_pointer_cast<typename TypeParam::ArrayType>(out);

for (int64_t i = 0; i < size; ++i) {
ASSERT_TRUE(result->IsNull(i)) << i;
Expand Down Expand Up @@ -917,6 +918,33 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNulls) {
}
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendEmptyValue) {
ASSERT_OK(this->builder_->AppendNull());
ASSERT_OK(this->builder_->AppendEmptyValue());
ASSERT_OK(this->builder_->AppendNulls(2));
ASSERT_OK(this->builder_->AppendEmptyValues(2));

std::shared_ptr<Array> out;
FinishAndCheckPadding(this->builder_.get(), &out);
ASSERT_OK(out->ValidateFull());

auto result = checked_pointer_cast<typename TypeParam::ArrayType>(out);
ASSERT_EQ(result->length(), 6);
ASSERT_EQ(result->null_count(), 3);

ASSERT_TRUE(result->IsNull(0));
ASSERT_FALSE(result->IsNull(1));
ASSERT_TRUE(result->IsNull(2));
ASSERT_TRUE(result->IsNull(3));
ASSERT_FALSE(result->IsNull(4));
ASSERT_FALSE(result->IsNull(5));

// implementation detail: the value slots are 0-initialized
for (int64_t i = 0; i < result->length(); ++i) {
ASSERT_EQ(result->Value(i), 0);
}
}

TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
DECL_T();

Expand Down Expand Up @@ -2104,6 +2132,18 @@ TEST_F(TestAdaptiveIntBuilder, TestAppendNulls) {
}
}

TEST_F(TestAdaptiveIntBuilder, TestAppendEmptyValue) {
ASSERT_OK(builder_->AppendNulls(2));
ASSERT_OK(builder_->AppendEmptyValue());
ASSERT_OK(builder_->Append(42));
ASSERT_OK(builder_->AppendEmptyValues(2));
Done();

ASSERT_OK(result_->ValidateFull());
// NOTE: The fact that we get 0 is really an implementation detail
AssertArraysEqual(*result_, *ArrayFromJSON(int8(), "[null, null, 0, 42, 0, 0]"));
}

TEST(TestAdaptiveIntBuilderWithStartIntSize, TestReset) {
auto builder = std::make_shared<AdaptiveIntBuilder>(
static_cast<uint8_t>(sizeof(int16_t)), default_memory_pool());
Expand Down Expand Up @@ -2322,6 +2362,18 @@ TEST_F(TestAdaptiveUIntBuilder, TestAppendNulls) {
}
}

TEST_F(TestAdaptiveUIntBuilder, TestAppendEmptyValue) {
ASSERT_OK(builder_->AppendNulls(2));
ASSERT_OK(builder_->AppendEmptyValue());
ASSERT_OK(builder_->Append(42));
ASSERT_OK(builder_->AppendEmptyValues(2));
Done();

ASSERT_OK(result_->ValidateFull());
// NOTE: The fact that we get 0 is really an implementation detail
AssertArraysEqual(*result_, *ArrayFromJSON(uint8(), "[null, null, 0, 42, 0, 0]"));
}

TEST(TestAdaptiveUIntBuilderWithStartIntSize, TestReset) {
auto builder = std::make_shared<AdaptiveUIntBuilder>(
static_cast<uint8_t>(sizeof(uint16_t)), default_memory_pool());
Expand Down
86 changes: 80 additions & 6 deletions cpp/src/arrow/array/array_union_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,24 @@ class UnionBuilderTest : public ::testing::Test {
AppendString("def");
AppendInt(-10);
AppendDouble(0.5);

ASSERT_OK(union_builder->Finish(&actual));
ASSERT_OK(actual->ValidateFull());
ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
}

void AppendNullsAndEmptyValues() {
AppendString("abc");
ASSERT_OK(union_builder->AppendNull());
ASSERT_OK(union_builder->AppendEmptyValue());
expected_types_vector.insert(expected_types_vector.end(), 3, I8);
AppendInt(42);
ASSERT_OK(union_builder->AppendNulls(2));
ASSERT_OK(union_builder->AppendEmptyValues(2));
expected_types_vector.insert(expected_types_vector.end(), 3, I8);

ASSERT_OK(union_builder->Finish(&actual));
ASSERT_OK(actual->ValidateFull());
ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
}

Expand All @@ -329,7 +346,9 @@ class UnionBuilderTest : public ::testing::Test {
AppendDouble(1.0);
AppendDouble(-1.0);
AppendDouble(0.5);

ASSERT_OK(union_builder->Finish(&actual));
ASSERT_OK(actual->ValidateFull());
ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);

ASSERT_EQ(I8, 0);
Expand Down Expand Up @@ -357,6 +376,7 @@ class UnionBuilderTest : public ::testing::Test {
AppendDouble(0.5);

ASSERT_OK(list_builder.Finish(actual));
ASSERT_OK((*actual)->ValidateFull());
ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
}

Expand All @@ -376,20 +396,20 @@ class SparseUnionBuilderTest : public UnionBuilderTest<SparseUnionBuilder> {

void AppendInt(int8_t i) override {
Base::AppendInt(i);
ASSERT_OK(str_builder->AppendNull());
ASSERT_OK(dbl_builder->AppendNull());
ASSERT_OK(str_builder->AppendEmptyValue());
ASSERT_OK(dbl_builder->AppendEmptyValue());
}

void AppendString(const std::string& str) override {
Base::AppendString(str);
ASSERT_OK(i8_builder->AppendNull());
ASSERT_OK(dbl_builder->AppendNull());
ASSERT_OK(i8_builder->AppendEmptyValue());
ASSERT_OK(dbl_builder->AppendEmptyValue());
}

void AppendDouble(double dbl) override {
Base::AppendDouble(dbl);
ASSERT_OK(i8_builder->AppendNull());
ASSERT_OK(str_builder->AppendNull());
ASSERT_OK(i8_builder->AppendEmptyValue());
ASSERT_OK(str_builder->AppendEmptyValue());
}
};

Expand All @@ -415,6 +435,34 @@ TEST_F(DenseUnionBuilderTest, Basics) {
ASSERT_ARRAYS_EQUAL(*expected, *actual);
}

TEST_F(DenseUnionBuilderTest, NullsAndEmptyValues) {
union_builder.reset(new DenseUnionBuilder(
default_memory_pool(), {i8_builder, str_builder, dbl_builder},
dense_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
{I8, STR, DBL})));
AppendNullsAndEmptyValues();

// Four null / empty values (the latter implementation-defined) were appended to I8
auto expected_i8 = ArrayFromJSON(int8(), "[null, 0, 42, null, 0]");
auto expected_str = ArrayFromJSON(utf8(), R"(["abc"])");
auto expected_dbl = ArrayFromJSON(float64(), "[]");

// "abc", null, 0, 42, null, null, 0, 0
auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 2, 3, 3, 4, 4]");

ASSERT_OK_AND_ASSIGN(auto expected,
DenseUnionArray::Make(*expected_types, *expected_offsets,
{expected_i8, expected_str, expected_dbl},
{"i8", "str", "dbl"}, {I8, STR, DBL}));

ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
ASSERT_ARRAYS_EQUAL(*expected, *actual);
// Physical arrays must be as expected
ASSERT_ARRAYS_EQUAL(*expected_i8, *actual->field(0));
ASSERT_ARRAYS_EQUAL(*expected_str, *actual->field(1));
ASSERT_ARRAYS_EQUAL(*expected_dbl, *actual->field(2));
}

TEST_F(DenseUnionBuilderTest, InferredType) {
AppendInferred();

Expand Down Expand Up @@ -467,6 +515,32 @@ TEST_F(SparseUnionBuilderTest, Basics) {
ASSERT_ARRAYS_EQUAL(*expected, *actual);
}

TEST_F(SparseUnionBuilderTest, NullsAndEmptyValues) {
union_builder.reset(new SparseUnionBuilder(
default_memory_pool(), {i8_builder, str_builder, dbl_builder},
sparse_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
{I8, STR, DBL})));
AppendNullsAndEmptyValues();

// "abc", null, 0, 42, null, null, 0, 0
// (note that getting 0 for empty values is implementation-defined)
auto expected_i8 = ArrayFromJSON(int8(), "[0, null, 0, 42, null, null, 0, 0]");
auto expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "", "", "", "", "", ""])");
auto expected_dbl = ArrayFromJSON(float64(), "[0, 0, 0, 0, 0, 0, 0, 0]");

ASSERT_OK_AND_ASSIGN(
auto expected,
SparseUnionArray::Make(*expected_types, {expected_i8, expected_str, expected_dbl},
{"i8", "str", "dbl"}, {I8, STR, DBL}));

ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
ASSERT_ARRAYS_EQUAL(*expected, *actual);
// Physical arrays must be as expected
ASSERT_ARRAYS_EQUAL(*expected_i8, *actual->field(0));
ASSERT_ARRAYS_EQUAL(*expected_str, *actual->field(1));
ASSERT_ARRAYS_EQUAL(*expected_dbl, *actual->field(2));
}

TEST_F(SparseUnionBuilderTest, InferredType) {
AppendInferred();

Expand Down
20 changes: 20 additions & 0 deletions cpp/src/arrow/array/builder_adaptive.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,26 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
return Status::OK();
}

Status AppendEmptyValues(int64_t length) final {
ARROW_RETURN_NOT_OK(CommitPendingData());
ARROW_RETURN_NOT_OK(Reserve(length));
memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
UnsafeSetNotNull(length);
return Status::OK();
}

Status AppendEmptyValue() final {
pending_data_[pending_pos_] = 0;
pending_valid_[pending_pos_] = 1;
++pending_pos_;
++length_;

if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
return CommitPendingData();
}
return Status::OK();
}

void Reset() override;
Status Resize(int64_t capacity) override;

Expand Down
16 changes: 16 additions & 0 deletions cpp/src/arrow/array/builder_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,25 @@ class ARROW_EXPORT ArrayBuilder {
/// Reset the builder.
virtual void Reset();

/// \brief Append a null value to builder
virtual Status AppendNull() = 0;
/// \brief Append a number of null values to builder
virtual Status AppendNulls(int64_t length) = 0;

/// \brief Append a non-null value to builder
///
/// The appended value is an implementation detail, but the corresponding
/// memory slot is guaranteed to be initialized.
/// This method is useful when appending a null value to a parent nested type.
virtual Status AppendEmptyValue() = 0;

/// \brief Append a number of non-null values to builder
///
/// The appended values are an implementation detail, but the corresponding
/// memory slot is guaranteed to be initialized.
/// This method is useful when appending null values to a parent nested type.
virtual Status AppendEmptyValues(int64_t length) = 0;

/// For cases where raw data was memcpy'd into the internal buffers, allows us
/// to advance the length of the builder. It is your responsibility to use
/// this function responsibly.
Expand Down
14 changes: 14 additions & 0 deletions cpp/src/arrow/array/builder_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,20 @@ Status FixedSizeBinaryBuilder::AppendNulls(int64_t length) {
return Status::OK();
}

Status FixedSizeBinaryBuilder::AppendEmptyValue() {
RETURN_NOT_OK(Reserve(1));
UnsafeAppendToBitmap(true);
byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
return Status::OK();
}

Status FixedSizeBinaryBuilder::AppendEmptyValues(int64_t length) {
RETURN_NOT_OK(Reserve(length));
UnsafeAppendToBitmap(length, true);
byte_builder_.UnsafeAppend(/*num_copies=*/length * byte_width_, 0);
return Status::OK();
}

void FixedSizeBinaryBuilder::Reset() {
ArrayBuilder::Reset();
byte_builder_.Reset();
Expand Down
Loading

0 comments on commit 843e8bb

Please sign in to comment.