Skip to content

Commit

Permalink
ARROW-8914: [C++] Keep BasicDecimal128 in native-endian order
Browse files Browse the repository at this point in the history
This PR changes to keep BasicDecimal128 in native-endian order instead of little-endian order.

There are two rationales.
1. In 2017, Decimal128 is defined as [little-endian order](apache@b2596f6). Now, an endianness relies on [this flag](https://github.com/apache/arrow/blob/master/docs/source/format/Columnar.rst#byte-order-endianness).

2. Generated code by Gandiva processes a computation with native-endian. The generated code uses `int128` generated by `ToBytes` for `Decimal128`.

Closes apache#7256 from kiszk/ARROW-8914

Authored-by: Kazuaki Ishizaki <[email protected]>
Signed-off-by: François Saint-Jacques <[email protected]>
  • Loading branch information
kiszk authored and fsaintjacques committed May 29, 2020
1 parent 75315f5 commit 3c1bc81
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 19 deletions.
2 changes: 1 addition & 1 deletion cpp/src/arrow/c/bridge_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1501,7 +1501,7 @@ static const uint8_t data_buffer2[] = "abcdefghijklmnopqrstuvwxyz";
#if ARROW_LITTLE_ENDIAN
static const uint64_t data_buffer3[] = {123456789, 0, 987654321, 0};
#else
static const uint64_t data_buffer3[] = {0x15cd5b0700000000, 0, 0xb168de3a00000000, 0};
static const uint64_t data_buffer3[] = {0, 123456789, 0, 987654321};
#endif
static const uint8_t data_buffer4[] = {1, 2, 0, 1, 3, 0};
static const float data_buffer5[] = {0.0f, 1.5f, -2.0f, 3.0f, 4.0f, 5.0f};
Expand Down
20 changes: 15 additions & 5 deletions cpp/src/arrow/util/basic_decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,15 @@ static constexpr auto kCarryBit = static_cast<uint64_t>(1) << static_cast<uint64
static constexpr BasicDecimal128 kMaxValue =
BasicDecimal128(5421010862427522170LL, 687399551400673280ULL - 1);

#if ARROW_LITTLE_ENDIAN
BasicDecimal128::BasicDecimal128(const uint8_t* bytes)
: BasicDecimal128(
BitUtil::FromLittleEndian(reinterpret_cast<const int64_t*>(bytes)[1]),
BitUtil::FromLittleEndian(reinterpret_cast<const uint64_t*>(bytes)[0])) {}
: BasicDecimal128(reinterpret_cast<const int64_t*>(bytes)[1],
reinterpret_cast<const uint64_t*>(bytes)[0]) {}
#else
BasicDecimal128::BasicDecimal128(const uint8_t* bytes)
: BasicDecimal128(reinterpret_cast<const int64_t*>(bytes)[0],
reinterpret_cast<const uint64_t*>(bytes)[1]) {}
#endif

std::array<uint8_t, 16> BasicDecimal128::ToBytes() const {
std::array<uint8_t, 16> out{{0}};
Expand All @@ -139,8 +144,13 @@ std::array<uint8_t, 16> BasicDecimal128::ToBytes() const {

void BasicDecimal128::ToBytes(uint8_t* out) const {
DCHECK_NE(out, nullptr);
reinterpret_cast<uint64_t*>(out)[0] = BitUtil::ToLittleEndian(low_bits_);
reinterpret_cast<int64_t*>(out)[1] = BitUtil::ToLittleEndian(high_bits_);
#if ARROW_LITTLE_ENDIAN
reinterpret_cast<uint64_t*>(out)[0] = low_bits_;
reinterpret_cast<int64_t*>(out)[1] = high_bits_;
#else
reinterpret_cast<int64_t*>(out)[0] = high_bits_;
reinterpret_cast<uint64_t*>(out)[1] = low_bits_;
#endif
}

BasicDecimal128& BasicDecimal128::Negate() {
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/util/basic_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class ARROW_EXPORT BasicDecimal128 {
static_cast<uint64_t>(value)) {}

/// \brief Create a BasicDecimal128 from an array of bytes. Bytes are assumed to be in
/// little-endian byte order.
/// native-endian byte order.
explicit BasicDecimal128(const uint8_t* bytes);

/// \brief Negate the current value (in-place)
Expand Down Expand Up @@ -113,7 +113,7 @@ class ARROW_EXPORT BasicDecimal128 {
/// \brief Get the low bits of the two's complement representation of the number.
inline uint64_t low_bits() const { return low_bits_; }

/// \brief Return the raw bytes of the value in little-endian byte order.
/// \brief Return the raw bytes of the value in native-endian byte order.
std::array<uint8_t, 16> ToBytes() const;
void ToBytes(uint8_t* out) const;

Expand Down
30 changes: 19 additions & 11 deletions cpp/src/arrow/util/decimal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -387,28 +387,36 @@ TEST(Decimal128Test, TestFromBigEndian) {
127 /* 01111111 */}) {
Decimal128 value(start);
for (int ii = 0; ii < 16; ++ii) {
auto little_endian = value.ToBytes();
std::reverse(little_endian.begin(), little_endian.end());
auto native_endian = value.ToBytes();
#if ARROW_LITTLE_ENDIAN
std::reverse(native_endian.begin(), native_endian.end());
#endif
// Limit the number of bytes we are passing to make
// sure that it works correctly. That's why all of the
// 'start' values don't have a 1 in the most significant
// bit place
ASSERT_OK_AND_EQ(value,
Decimal128::FromBigEndian(little_endian.data() + 15 - ii, ii + 1));
Decimal128::FromBigEndian(native_endian.data() + 15 - ii, ii + 1));

// Negate it and convert to big endian
// Negate it
auto negated = -value;
little_endian = negated.ToBytes();
std::reverse(little_endian.begin(), little_endian.end());
native_endian = negated.ToBytes();
#if ARROW_LITTLE_ENDIAN
// convert to big endian
std::reverse(native_endian.begin(), native_endian.end());
#endif
// The sign bit is looked up in the MSB
ASSERT_OK_AND_EQ(negated,
Decimal128::FromBigEndian(little_endian.data() + 15 - ii, ii + 1));
Decimal128::FromBigEndian(native_endian.data() + 15 - ii, ii + 1));

// Take the complement and convert to big endian
// Take the complement
auto complement = ~value;
little_endian = complement.ToBytes();
std::reverse(little_endian.begin(), little_endian.end());
ASSERT_OK_AND_EQ(complement, Decimal128::FromBigEndian(little_endian.data(), 16));
native_endian = complement.ToBytes();
#if ARROW_LITTLE_ENDIAN
// convert to big endian
std::reverse(native_endian.begin(), native_endian.end());
#endif
ASSERT_OK_AND_EQ(complement, Decimal128::FromBigEndian(native_endian.data(), 16));

value <<= 8;
value += Decimal128(start);
Expand Down

0 comments on commit 3c1bc81

Please sign in to comment.