Skip to content

Commit

Permalink
IMPALA-5572: Timestamp codegen for text scanner
Browse files Browse the repository at this point in the history
Currently codegen is disabled when scanning text tables with timestamp
columns. The message is "Timestamp not yet supported for codegen."
This patch adds support for timestamp codegen.
A simple query in the comment section of this issue performs a little
better (4%) than interpreted version.

Testing: The patch passed test with exhaustive workload exploration
strategy.

Change-Id: I00cbf8ec7784ca9594e14e952f46dc54a5ede44b
Reviewed-on: http://gerrit.cloudera.org:8080/7556
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Impala Public Jenkins
  • Loading branch information
glglwty authored and jenkins committed Aug 8, 2017
1 parent 87af6dc commit bab9d6f
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 6 deletions.
1 change: 1 addition & 0 deletions be/src/codegen/gen_ir_descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@
["STRING_TO_INT64", "IrStringToInt64"],
["STRING_TO_FLOAT", "IrStringToFloat"],
["STRING_TO_DOUBLE", "IrStringToDouble"],
["STRING_TO_TIMESTAMP", "IrStringToTimestamp"],
["IS_NULL_STRING", "IrIsNullString"],
["GENERIC_IS_NULL_STRING", "IrGenericIsNullString"],
["RAW_VALUE_COMPARE",
Expand Down
6 changes: 6 additions & 0 deletions be/src/exec/hdfs-scanner-ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ double IrStringToDouble(const char* s, int len, StringParser::ParseResult* resul
return StringParser::StringToFloat<double>(s, len, result);
}

extern "C"
TimestampValue IrStringToTimestamp(const char* s, int len,
StringParser::ParseResult* result) {
return StringParser::StringToTimestamp(s, len, result);
}

extern "C"
bool IrIsNullString(const char* data, int len) {
return data == NULL || (len == 2 && data[0] == '\\' && data[1] == 'N');
Expand Down
3 changes: 0 additions & 3 deletions be/src/exec/hdfs-scanner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -311,9 +311,6 @@ Status HdfsScanner::CodegenWriteCompleteTuple(HdfsScanNodeBase* node,
// TODO: Timestamp is not yet supported
for (int i = 0; i < node->materialized_slots().size(); ++i) {
SlotDescriptor* slot_desc = node->materialized_slots()[i];
if (slot_desc->type().type == TYPE_TIMESTAMP) {
return Status::Expected("Timestamp not yet supported for codegen.");
}
if (slot_desc->type().type == TYPE_DECIMAL) {
return Status::Expected("Decimal not yet supported for codegen.");
}
Expand Down
19 changes: 16 additions & 3 deletions be/src/exec/text-converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
case TYPE_DOUBLE:
parse_fn_enum = IRFunction::STRING_TO_DOUBLE;
break;
case TYPE_TIMESTAMP:
parse_fn_enum = IRFunction::STRING_TO_TIMESTAMP;
break;
default:
DCHECK(false);
return NULL;
Expand All @@ -234,9 +237,18 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
LlvmCodeGen::NamedVariable parse_result("parse_result", codegen->GetType(TYPE_INT));
Value* parse_result_ptr = codegen->CreateEntryBlockAlloca(fn, parse_result);

Value* parse_return;
// Call Impala's StringTo* function
Value* result = builder.CreateCall(parse_fn,
ArrayRef<Value*>({args[1], args[2], parse_result_ptr}));
if (parse_fn->arg_size() == 3) {
parse_return = builder.CreateCall(parse_fn, {args[1], args[2], parse_result_ptr});
} else {
DCHECK(parse_fn->arg_size() == 4);
// If the return value is large (more than 16 bytes in our toolchain) the first
// parameter would be a pointer to value parsed and the return value of callee
// should be ignored
builder.CreateCall(parse_fn, {slot, args[1], args[2], parse_result_ptr});
parse_return = nullptr;
}
Value* parse_result_val = builder.CreateLoad(parse_result_ptr, "parse_result");
Value* failed_value = codegen->GetIntConstant(TYPE_INT, StringParser::PARSE_FAILURE);

Expand All @@ -254,7 +266,8 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,

// Parse succeeded
builder.SetInsertPoint(parse_success_block);
builder.CreateStore(result, slot);
// If the parsed value is in parse_return, move it into slot
if (parse_fn->arg_size() == 3) builder.CreateStore(parse_return, slot);
builder.CreateRet(codegen->true_value());

// Parse failed, set slot to null and return false
Expand Down
11 changes: 11 additions & 0 deletions be/src/util/string-parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

#include "common/logging.h"
#include "runtime/decimal-value.h"
#include "runtime/timestamp-parse-util.h"
#include "runtime/timestamp-value.h"
#include "util/decimal-util.h"

namespace impala {
Expand Down Expand Up @@ -95,6 +97,15 @@ class StringParser {
return StringToBoolInternal(s + i, len - i, result);
}

/// Parse a TimestampValue from s.
static inline TimestampValue StringToTimestamp(const char* s, int len,
ParseResult* result) {
boost::gregorian::date d;
boost::posix_time::time_duration t;
*result = TimestampParser::Parse(s, len, &d, &t) ? PARSE_SUCCESS : PARSE_FAILURE;
return {d, t};
}

/// Parses a decimal from s, returning the result.
/// The parse status is returned in *result.
/// On overflow or invalid values, the return value is undefined.
Expand Down

0 comments on commit bab9d6f

Please sign in to comment.