Skip to content

Commit

Permalink
IMPALA-5014: Part 1: Round when casting string to decimal
Browse files Browse the repository at this point in the history
In this patch we implement rounding when casting string to decimal if
DECIMAL_V2 is enabled. The backend method that parses strings and
converts them to decimals is refactored to make it easier to understand.

Testing:
- Added some BE tests.

Change-Id: Icd8b92727fb384e6ff2d145e4aab7ae5d27db26d
Reviewed-on: http://gerrit.cloudera.org:8080/8774
Reviewed-by: Taras Bobrovytsky <[email protected]>
Tested-by: Impala Public Jenkins
  • Loading branch information
tbobrovytsky authored and cloudera-hudson committed Dec 22, 2017
1 parent 58aa9c4 commit f7c7c70
Show file tree
Hide file tree
Showing 12 changed files with 245 additions and 104 deletions.
2 changes: 1 addition & 1 deletion be/src/benchmarks/atod-benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void TestImpala(int batch_size, void* d) {
const StringValue& str = data->data[j];
StringParser::ParseResult dummy;
val = StringParser::StringToDecimal<Storage>(
str.ptr, str.len, column_type, &dummy);
str.ptr, str.len, column_type, false, &dummy);
data->result[j] = val;
}
}
Expand Down
2 changes: 1 addition & 1 deletion be/src/benchmarks/overflow-benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void AddTestData(TestData* data, int n) {
string str = ss.str();
StringParser::ParseResult dummy;
val = StringParser::StringToDecimal<int128_t>(
str.c_str(), str.length(), column_type, &dummy);
str.c_str(), str.length(), column_type, false, &dummy);
data->values.push_back(val);
data->int128_values.push_back(numeric_limits<int128_t>::max() * Rand());
}
Expand Down
12 changes: 6 additions & 6 deletions be/src/exec/hdfs-scanner-ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ ScalarExprEvaluator* HdfsScanner::GetConjunctEval(int idx) const {

void StringToDecimalSymbolDummy() {
// Force linker to to link the object file containing these functions.
StringToDecimal4(nullptr, 0, 0, 0, nullptr);
StringToDecimal8(nullptr, 0, 0, 0, nullptr);
StringToDecimal16(nullptr, 0, 0, 0, nullptr);
StringToDecimal4(nullptr, 0, 0, 0, false, nullptr);
StringToDecimal8(nullptr, 0, 0, 0, false, nullptr);
StringToDecimal16(nullptr, 0, 0, 0, false, nullptr);
}

// Define the string parsing functions for llvm. Stamp out the templated functions
Expand Down Expand Up @@ -142,23 +142,23 @@ void IrStringToTimestamp(TimestampValue* out, const char* s, int len,
extern "C"
Decimal4Value IrStringToDecimal4(const char* s, int len, int type_precision,
int type_scale, ParseResult* result) {
auto ret = StringToDecimal4(s, len, type_precision, type_scale, result);
auto ret = StringToDecimal4(s, len, type_precision, type_scale, false, result);
if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE;
return ret;
}

extern "C"
Decimal8Value IrStringToDecimal8(const char* s, int len, int type_precision,
int type_scale, ParseResult* result) {
auto ret = StringToDecimal8(s, len, type_precision, type_scale, result);
auto ret = StringToDecimal8(s, len, type_precision, type_scale, false, result);
if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE;
return ret;
}

extern "C"
Decimal16Value IrStringToDecimal16(const char* s, int len, int type_precision,
int type_scale, ParseResult* result) {
auto ret = StringToDecimal16(s, len, type_precision, type_scale, result);
auto ret = StringToDecimal16(s, len, type_precision, type_scale, false, result);
if (*result != ParseResult::PARSE_SUCCESS) *result = ParseResult::PARSE_FAILURE;
return ret;
}
Expand Down
6 changes: 3 additions & 3 deletions be/src/exec/text-converter.inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,20 +141,20 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tup
case 4:
*reinterpret_cast<Decimal4Value*>(slot) =
StringParser::StringToDecimal<int32_t>(
data, len, slot_desc->type(), &parse_result);
data, len, slot_desc->type(), false, &parse_result);
break;
case 8:
*reinterpret_cast<Decimal8Value*>(slot) =
StringParser::StringToDecimal<int64_t>(
data, len, slot_desc->type(), &parse_result);
data, len, slot_desc->type(), false, &parse_result);
break;
case 12:
DCHECK(false) << "Planner should not generate this.";
break;
case 16:
*reinterpret_cast<Decimal16Value*>(slot) =
StringParser::StringToDecimal<int128_t>(
data, len, slot_desc->type(), &parse_result);
data, len, slot_desc->type(), false, &parse_result);
break;
default:
DCHECK(false) << "Decimal slots can't be this size.";
Expand Down
10 changes: 7 additions & 3 deletions be/src/exprs/decimal-operators-ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -534,22 +534,26 @@ IR_ALWAYS_INLINE DecimalVal DecimalOperators::CastToDecimalVal(
DecimalVal dv;
int precision = ctx->impl()->GetConstFnAttr(FunctionContextImpl::RETURN_TYPE_PRECISION);
int scale = ctx->impl()->GetConstFnAttr(FunctionContextImpl::RETURN_TYPE_SCALE);
bool is_decimal_v2 = ctx->impl()->GetConstFnAttr(FunctionContextImpl::DECIMAL_V2);
switch (ColumnType::GetDecimalByteSize(precision)) {
case 4: {
Decimal4Value dv4 = StringParser::StringToDecimal<int32_t>(
reinterpret_cast<char*>(val.ptr), val.len, precision, scale, &result);
reinterpret_cast<char*>(val.ptr), val.len, precision, scale,
is_decimal_v2, &result);
dv = DecimalVal(dv4.value());
break;
}
case 8: {
Decimal8Value dv8 = StringParser::StringToDecimal<int64_t>(
reinterpret_cast<char*>(val.ptr), val.len, precision, scale, &result);
reinterpret_cast<char*>(val.ptr), val.len, precision, scale,
is_decimal_v2, &result);
dv = DecimalVal(dv8.value());
break;
}
case 16: {
Decimal16Value dv16 = StringParser::StringToDecimal<int128_t>(
reinterpret_cast<char*>(val.ptr), val.len, precision, scale, &result);
reinterpret_cast<char*>(val.ptr), val.len, precision, scale,
is_decimal_v2, &result);
dv = DecimalVal(dv16.value());
break;
}
Expand Down
6 changes: 3 additions & 3 deletions be/src/exprs/expr-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -494,15 +494,15 @@ class ExprTest : public testing::Test {
switch (expected_type.GetByteSize()) {
case 4:
EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int32_t>(
value.data(), value.size(), expected_type, &result).value()) << query;
value.data(), value.size(), expected_type, false, &result).value()) << query;
break;
case 8:
EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int64_t>(
value.data(), value.size(), expected_type, &result).value()) << query;
value.data(), value.size(), expected_type, false, &result).value()) << query;
break;
case 16:
EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int128_t>(
value.data(), value.size(), expected_type, &result).value()) << query;
value.data(), value.size(), expected_type, false, &result).value()) << query;
break;
default:
EXPECT_TRUE(false) << expected_type << " " << expected_type.GetByteSize();
Expand Down
115 changes: 105 additions & 10 deletions be/src/runtime/decimal-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,34 @@ void VerifyEquals(const DecimalValue<T>& t1, const DecimalValue<T>& t2) {
}

template <typename T>
void VerifyParse(const string& s, int precision, int scale,
void VerifyParse(const string& s, int precision, int scale, bool round,
const DecimalValue<T>& expected_val, StringParser::ParseResult expected_result) {
StringParser::ParseResult parse_result;
DecimalValue<T> val = StringParser::StringToDecimal<T>(
s.c_str(), s.size(), precision, scale, &parse_result);
s.c_str(), s.size(), precision, scale, round, &parse_result);
EXPECT_EQ(expected_result, parse_result) << "Failed test string: " << s;
if (expected_result == StringParser::PARSE_SUCCESS ||
expected_result == StringParser::PARSE_UNDERFLOW) {
VerifyEquals(expected_val, val);
}
}

template <typename T>
void VerifyParse(const string& s, int precision, int scale,
const DecimalValue<T>& expected_val, StringParser::ParseResult expected_result) {
VerifyParse(s, precision, scale, false, expected_val, expected_result);
VerifyParse(s, precision, scale, true, expected_val, expected_result);
}

template <typename T>
void VerifyParse(const string& s, int precision, int scale,
const DecimalValue<T>& expected_val_v1, StringParser::ParseResult expected_result_v1,
const DecimalValue<T>& expected_val_v2, StringParser::ParseResult expected_result_v2)
{
VerifyParse(s, precision, scale, false, expected_val_v1, expected_result_v1);
VerifyParse(s, precision, scale, true, expected_val_v2, expected_result_v2);
}

template<typename T>
void VerifyToString(const T& decimal, int precision, int scale, const string& expected) {
EXPECT_EQ(decimal.ToString(precision, scale), expected);
Expand All @@ -69,6 +85,17 @@ void StringToAllDecimals(const string& s, int precision, int scale, int32_t val,
VerifyParse(s, precision, scale, Decimal16Value(val), result);
}

void StringToAllDecimals(const string& s, int precision, int scale,
int32_t val_v1, StringParser::ParseResult result_v1,
int32_t val_v2, StringParser::ParseResult result_v2) {
VerifyParse(s, precision, scale,
Decimal4Value(val_v1), result_v1, Decimal4Value(val_v2), result_v2);
VerifyParse(s, precision, scale,
Decimal8Value(val_v1), result_v1, Decimal8Value(val_v2), result_v2);
VerifyParse(s, precision, scale,
Decimal16Value(val_v1), result_v1, Decimal16Value(val_v2), result_v2);
}

TEST(IntToDecimal, Basic) {
Decimal16Value d16;
bool overflow = false;
Expand Down Expand Up @@ -271,13 +298,27 @@ TEST(StringToDecimal, Basic) {
StringToAllDecimals(".1", 10, 2, 10, StringParser::PARSE_SUCCESS);
StringToAllDecimals("00012.3", 10, 2, 1230, StringParser::PARSE_SUCCESS);
StringToAllDecimals("-00012.3", 10, 2, -1230, StringParser::PARSE_SUCCESS);
StringToAllDecimals("0.00000", 6, 5, 0, StringParser::PARSE_SUCCESS);
StringToAllDecimals("1.00000", 6, 5, 100000, StringParser::PARSE_SUCCESS);
StringToAllDecimals("0.000000", 6, 5, 0, StringParser::PARSE_UNDERFLOW);
StringToAllDecimals("1.000000", 6, 5, 100000, StringParser::PARSE_UNDERFLOW);
StringToAllDecimals("1.000004", 6, 5, 100000, StringParser::PARSE_UNDERFLOW);
StringToAllDecimals("1.000005", 6, 5,
100000, StringParser::PARSE_UNDERFLOW,
100001, StringParser::PARSE_UNDERFLOW);
StringToAllDecimals("0.4", 5, 0, 0, StringParser::PARSE_UNDERFLOW);
StringToAllDecimals("0.5", 5, 0,
0, StringParser::PARSE_UNDERFLOW,
1, StringParser::PARSE_UNDERFLOW);

StringToAllDecimals("123.45", 10, 2, 12345, StringParser::PARSE_SUCCESS);
StringToAllDecimals(".45", 10, 2, 45, StringParser::PARSE_SUCCESS);
StringToAllDecimals("-.45", 10, 2, -45, StringParser::PARSE_SUCCESS);
StringToAllDecimals(" 123.4 ", 10, 5, 12340000, StringParser::PARSE_SUCCESS);
StringToAllDecimals("-123.45", 10, 5, -12345000, StringParser::PARSE_SUCCESS);
StringToAllDecimals("-123.456", 10, 2, -12345, StringParser::PARSE_UNDERFLOW);
StringToAllDecimals("-123.456", 10, 2,
-12345, StringParser::PARSE_UNDERFLOW,
-12346, StringParser::PARSE_UNDERFLOW);

StringToAllDecimals("e", 2, 0, 0, StringParser::PARSE_FAILURE);
StringToAllDecimals("E", 2, 0, 0, StringParser::PARSE_FAILURE);
Expand Down Expand Up @@ -347,15 +388,17 @@ TEST(StringToDecimal, LargeDecimals) {
VerifyParse("123456.78", 8, 3,
Decimal8Value(12345678L), StringParser::PARSE_OVERFLOW);
VerifyParse("1234.5678", 8, 3,
Decimal8Value(1234567L), StringParser::PARSE_UNDERFLOW);
Decimal8Value(1234567L), StringParser::PARSE_UNDERFLOW,
Decimal8Value(1234568L), StringParser::PARSE_UNDERFLOW);
VerifyParse("12345.678", 8, 3,
Decimal16Value(12345678L), StringParser::PARSE_SUCCESS);
VerifyParse("-12345.678", 8, 3,
Decimal16Value(-12345678L), StringParser::PARSE_SUCCESS);
VerifyParse("123456.78", 8, 3,
Decimal16Value(12345678L), StringParser::PARSE_OVERFLOW);
VerifyParse("1234.5678", 8, 3,
Decimal16Value(1234567L), StringParser::PARSE_UNDERFLOW);
Decimal16Value(1234567L), StringParser::PARSE_UNDERFLOW,
Decimal16Value(1234568L), StringParser::PARSE_UNDERFLOW);

// Test max unscaled value for each of the decimal types.
VerifyParse("999999999", 9, 0,
Expand Down Expand Up @@ -411,16 +454,68 @@ TEST(StringToDecimal, LargeDecimals) {
38, 38, Decimal16Value(-result), StringParser::PARSE_SUCCESS);
VerifyParse("-.99999999999999999999999999999999999999e1",
38, 38, Decimal16Value(-result), StringParser::PARSE_OVERFLOW);
VerifyParse("-.999999999999999999999999999999999999990e-1",
38, 38, Decimal16Value(-result / 10), StringParser::PARSE_UNDERFLOW);
VerifyParse("-.999999999999999999999999999999999999990000000000000000e-20",
38, 38,
VerifyParse("-.999999999999999999999999999999999999990e-1", 38, 38,
Decimal16Value(-result / 10), StringParser::PARSE_UNDERFLOW,
Decimal16Value(-result / 10 - 1), StringParser::PARSE_UNDERFLOW);
VerifyParse("-.999999999999999999999999999999999999990000000000000000e-20", 38, 38,
Decimal16Value(-result / DecimalUtil::GetScaleMultiplier<int128_t>(20)),
StringParser::PARSE_UNDERFLOW,
Decimal16Value(-result / DecimalUtil::GetScaleMultiplier<int128_t>(20) - 1),
StringParser::PARSE_UNDERFLOW);
VerifyParse("100000000000000000000000000000000000000",
38, 0, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
VerifyParse("-100000000000000000000000000000000000000",
38, 0, Decimal16Value(0), StringParser::PARSE_OVERFLOW);

// Rounding tests.
VerifyParse("555.554", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
VerifyParse("555.555", 5, 2,
Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
// Too many digits to the left of the dot so we overflow.
VerifyParse("555.555e1", 5, 2, Decimal4Value(0), StringParser::PARSE_OVERFLOW);
VerifyParse("-555.555e1", 5, 2, Decimal4Value(0), StringParser::PARSE_OVERFLOW);
VerifyParse("5555.555", 5, 2, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
VerifyParse("-555.555", 5, 2,
Decimal4Value(-55555), StringParser::PARSE_UNDERFLOW,
Decimal4Value(-55556), StringParser::PARSE_UNDERFLOW);
VerifyParse("-5555.555", 5, 2, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
VerifyParse("5555.555e-1", 5, 2,
Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
VerifyParse("55555.555e-1", 5, 2, Decimal16Value(0), StringParser::PARSE_OVERFLOW);
// Too many digits to the right of the dot and not enough to the left. Rounding via
// ScaleDownAndRound().
VerifyParse("5.55444", 5, 2, Decimal4Value(555), StringParser::PARSE_UNDERFLOW);
VerifyParse("5.55555", 5, 2,
Decimal4Value(555), StringParser::PARSE_UNDERFLOW,
Decimal4Value(556), StringParser::PARSE_UNDERFLOW);
VerifyParse("5.555e-9", 5, 2, Decimal4Value(0), StringParser::PARSE_UNDERFLOW);
// The number of digits to the left of the dot equals to precision - scale. Rounding
// by adding 1 if the first truncated digit is greater or equal to 5.
VerifyParse("555.554", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
VerifyParse("555.555", 5, 2,
Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
VerifyParse("5.55554e2", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
VerifyParse("5.55555e2", 5, 2,
Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
VerifyParse("55555.4e-2", 5, 2, Decimal4Value(55555), StringParser::PARSE_UNDERFLOW);
VerifyParse("55555.5e-2", 5, 2,
Decimal4Value(55555), StringParser::PARSE_UNDERFLOW,
Decimal4Value(55556), StringParser::PARSE_UNDERFLOW);
// Rounding causes overflow.
VerifyParse("999.994", 5, 2, Decimal4Value(99999), StringParser::PARSE_UNDERFLOW);
VerifyParse("999.995", 5, 2,
Decimal4Value(99999), StringParser::PARSE_UNDERFLOW,
Decimal4Value(0), StringParser::PARSE_OVERFLOW);
VerifyParse("9.99995e2", 5, 2,
Decimal4Value(99999), StringParser::PARSE_UNDERFLOW,
Decimal4Value(0), StringParser::PARSE_OVERFLOW);
VerifyParse("99999.5e-2", 5, 2,
Decimal4Value(99999), StringParser::PARSE_UNDERFLOW,
Decimal4Value(0), StringParser::PARSE_OVERFLOW);
}

TEST(DecimalTest, Overflow) {
Expand Down Expand Up @@ -780,7 +875,7 @@ TEST(DecimalArithmetic, DivideLargeScales) {
StringParser::ParseResult result;
const char* data = "319391280635.61476055";
Decimal16Value x =
StringParser::StringToDecimal<int128_t>(data, strlen(data), t1, &result);
StringParser::StringToDecimal<int128_t>(data, strlen(data), t1, false, &result);
Decimal16Value y(10000);
bool is_nan = false;
bool is_overflow = false;
Expand Down
31 changes: 5 additions & 26 deletions be/src/runtime/decimal-value.inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,27 +140,6 @@ inline DecimalValue<T> DecimalValue<T>::ScaleTo(int src_scale, int dst_scale,

namespace detail {

// Helper function to scale down over multiplied values back into result type,
// truncating if round is false or rounding otherwise.
template<typename T, typename RESULT_T>
inline RESULT_T ScaleDownAndRound(RESULT_T value, int delta_scale, bool round) {
DCHECK_GT(delta_scale, 0);
// Multiplier can always be computed in potentially smaller type T
T multiplier = DecimalUtil::GetScaleMultiplier<T>(delta_scale);
DCHECK(multiplier > 1 && multiplier % 2 == 0);
RESULT_T result = value / multiplier;
if (round) {
RESULT_T remainder = value % multiplier;
// In general, shifting down the multiplier is not safe, but we know
// here that it is a multiple of two.
if (abs(remainder) >= (multiplier >> 1)) {
// Bias at zero must be corrected by sign of dividend.
result += BitUtil::Sign(value);
}
}
return result;
}

// If we have a number with 'num_lz' leading zeros, and we scale it up by 10^scale_diff,
// this function returns the minimum number of leading zeros the result would have.
inline int MinLeadingZerosAfterScaling(int num_lz, int scale_diff) {
Expand Down Expand Up @@ -232,7 +211,7 @@ inline int128_t AddLarge(int128_t x, int x_scale, int128_t y, int y_scale,
right = x_right + y_right;
}
if (result_scale_decrease > 0) {
right = detail::ScaleDownAndRound<int128_t, int128_t>(
right = DecimalUtil::ScaleDownAndRound<int128_t>(
right, result_scale_decrease, round);
}
DCHECK(right >= 0);
Expand Down Expand Up @@ -288,7 +267,7 @@ inline int128_t SubtractLarge(int128_t x, int x_scale, int128_t y, int y_scale,
if (result_scale_decrease > 0) {
// At this point, the scale of the fractional part is either x_scale or y_scale,
// whichever is greater. We scale down the fractional part to result_scale here.
right = detail::ScaleDownAndRound<int128_t, int128_t>(
right = DecimalUtil::ScaleDownAndRound<int128_t>(
right, result_scale_decrease, round);
}

Expand Down Expand Up @@ -351,7 +330,7 @@ inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale,
if (result_scale_decrease > 0) {
// After first adjusting x and y to the same scale and adding them together, we now
// need scale down the result to result_scale.
x = detail::ScaleDownAndRound<T, RESULT_T>(x, result_scale_decrease, round);
x = DecimalUtil::ScaleDownAndRound<RESULT_T>(x, result_scale_decrease, round);
}
return DecimalValue<RESULT_T>(x);
}
Expand Down Expand Up @@ -418,7 +397,7 @@ DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale,
DCHECK(*overflow);
} else {
int256_t intermediate_result = ConvertToInt256(x) * ConvertToInt256(y);
intermediate_result = detail::ScaleDownAndRound<int256_t, int256_t>(
intermediate_result = DecimalUtil::ScaleDownAndRound<int256_t>(
intermediate_result, delta_scale, round);
result = ConvertToInt128(
intermediate_result, DecimalUtil::MAX_UNSCALED_DECIMAL16, overflow);
Expand All @@ -436,7 +415,7 @@ DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale,
result = x * y;
// The largest value that result can have here is (2^64 - 1) * (2^63 - 1), which is
// greater than MAX_UNSCALED_DECIMAL16.
result = detail::ScaleDownAndRound<T, RESULT_T>(result, delta_scale, round);
result = DecimalUtil::ScaleDownAndRound<RESULT_T>(result, delta_scale, round);
// Since delta_scale is greater than zero, result can now be at most
// ((2^64 - 1) * (2^63 - 1)) / 10, which is less than MAX_UNSCALED_DECIMAL16, so
// there is no need to check for overflow.
Expand Down
Loading

0 comments on commit f7c7c70

Please sign in to comment.