Skip to content

Commit

Permalink
Hive: Fix writing of Date, Decimal, Time and UUID types. (apache#2126)
Browse files Browse the repository at this point in the history
  • Loading branch information
lcspinter authored Jan 25, 2021
1 parent bfcf9c3 commit 5ed15d0
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import org.apache.iceberg.util.DateTimeUtil;

public final class IcebergDateObjectInspectorHive3 extends AbstractPrimitiveJavaObjectInspector
implements DateObjectInspector {
implements DateObjectInspector, WriteObjectInspector {

private static final IcebergDateObjectInspectorHive3 INSTANCE = new IcebergDateObjectInspectorHive3();

Expand Down Expand Up @@ -69,4 +69,13 @@ public Object copyObject(Object o) {
}
}

@Override
public LocalDate convert(Object o) {
if (o == null) {
return null;
}

Date date = (Date) o;
return LocalDate.of(date.getYear(), date.getMonth(), date.getDay());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ public Object copyObject(Object o) {

@Override
public BigDecimal convert(Object o) {
return o == null ? null : ((HiveDecimal) o).bigDecimalValue();
if (o == null) {
return null;
}

BigDecimal result = ((HiveDecimal) o).bigDecimalValue();
// during the HiveDecimal to BigDecimal conversion the scale is lost, when the value is 0
result = result.setScale(scale());
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.iceberg.mr.hive.serde.objectinspector;

import java.time.LocalTime;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
Expand Down Expand Up @@ -49,8 +50,8 @@ public Text getPrimitiveWritableObject(Object o) {
}

@Override
public Object convert(Object o) {
return o == null ? null : o.toString();
public LocalTime convert(Object o) {
return o == null ? null : LocalTime.parse((String) o);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.iceberg.mr.hive.serde.objectinspector;

import java.util.UUID;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
Expand Down Expand Up @@ -49,8 +50,8 @@ public Text getPrimitiveWritableObject(Object o) {
}

@Override
public String convert(Object o) {
return o == null ? null : o.toString();
public UUID convert(Object o) {
return o == null ? null : UUID.fromString(o.toString());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@
import java.sql.Timestamp;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.type.HiveDecimal;
Expand Down Expand Up @@ -128,8 +130,8 @@ public static Record getTestRecord() {
record.set(9, new byte[]{0, 1, 2});
record.set(10, ByteBuffer.wrap(new byte[]{0, 1, 2, 3}));
record.set(11, new BigDecimal("0.0000000013"));
record.set(12, "11:33");
record.set(13, "73689599-d7fc-4dfb-b94e-106ff20284a5");
record.set(12, LocalTime.of(11, 33));
record.set(13, UUID.fromString("73689599-d7fc-4dfb-b94e-106ff20284a5"));

return record;
}
Expand Down Expand Up @@ -167,8 +169,8 @@ public static List<Object> valuesForTestRecord(Record record) {
new BytesWritable(record.get(9, byte[].class)),
new BytesWritable(ByteBuffers.toByteArray(record.get(10, ByteBuffer.class))),
new HiveDecimalWritable(HiveDecimal.create(record.get(11, BigDecimal.class))),
new Text(record.get(12, String.class)),
new Text(record.get(13, String.class))
new Text(record.get(12, LocalTime.class).toString()),
new Text(record.get(13, UUID.class).toString())
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
package org.apache.iceberg.mr.hive;

import java.io.IOException;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
Expand Down Expand Up @@ -310,6 +313,36 @@ public void testInsert() throws IOException {
HiveIcebergTestUtils.validateData(table, new ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS), 0);
}

@Test
public void testInsertSupportedTypes() throws IOException {
Assume.assumeTrue("Tez write is not implemented yet", executionEngine.equals("mr"));
for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
Type type = SUPPORTED_TYPES.get(i);
// TODO: remove this filter when issue #1881 is resolved
if (type == Types.UUIDType.get() && fileFormat == FileFormat.PARQUET) {
continue;
}
// TODO: remove this filter when we figure out how we could test binary types
if (type.equals(Types.BinaryType.get()) || type.equals(Types.FixedType.ofLength(5))) {
continue;
}
String tableName = type.typeId().toString().toLowerCase() + "_table_" + i;
String columnName = type.typeId().toString().toLowerCase() + "_column";

Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, columnName, type));
List<Record> expected = TestHelper.generateRandomRecords(schema, 5, 0L);

Table table = testTables.createTable(shell, tableName, schema, fileFormat, ImmutableList.of());
StringBuilder query = new StringBuilder("INSERT INTO ").append(tableName).append(" VALUES")
.append(expected.stream()
.map(r -> String.format("(%s,%s)", r.get(0),
getStringValueForInsert(r.get(1), type)))
.collect(Collectors.joining(",")));
shell.executeStatement(query.toString());
HiveIcebergTestUtils.validateData(table, expected, 0);
}
}

/**
* Testing map only inserts.
* @throws IOException If there is an underlying IOException
Expand Down Expand Up @@ -579,4 +612,18 @@ private StringBuilder buildComplexTypeInnerQuery(Object field, Type type) {
}
return query;
}

private String getStringValueForInsert(Object value, Type type) {
String template = "\'%s\'";
if (type.equals(Types.TimestampType.withoutZone())) {
return String.format(template, Timestamp.valueOf((LocalDateTime) value).toString());
} else if (type.equals(Types.TimestampType.withZone())) {
return String.format(template, Timestamp.from(((OffsetDateTime) value).toInstant()).toString());
} else if (type.equals(Types.BooleanType.get())) {
// in hive2 boolean type values must not be surrounded in apostrophes. Otherwise the value is translated to true.
return value.toString();
} else {
return String.format(template, value.toString());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,13 @@ public void testIcebergTimeObjectInspector() {
Assert.assertNull(oi.getPrimitiveWritableObject(null));
Assert.assertNull(oi.convert(null));

String time = LocalTime.now().toString();
LocalTime localTime = LocalTime.now();
String time = localTime.toString();
Text text = new Text(time);

Assert.assertEquals(time, oi.getPrimitiveJavaObject(text));
Assert.assertEquals(text, oi.getPrimitiveWritableObject(time));
Assert.assertEquals(time, oi.convert(text));
Assert.assertEquals(localTime, oi.convert(time));

Text copy = (Text) oi.copyObject(text);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,13 @@ public void testIcebergUUIDObjectInspector() {
Assert.assertNull(oi.getPrimitiveWritableObject(null));
Assert.assertNull(oi.convert(null));

String uuid = UUID.randomUUID().toString();
Text text = new Text(uuid);
UUID uuid = UUID.randomUUID();
String uuidStr = uuid.toString();
Text text = new Text(uuidStr);

Assert.assertEquals(uuid, oi.getPrimitiveJavaObject(text));
Assert.assertEquals(text, oi.getPrimitiveWritableObject(uuid));
Assert.assertEquals(uuid, oi.convert(text));
Assert.assertEquals(uuidStr, oi.getPrimitiveJavaObject(text));
Assert.assertEquals(text, oi.getPrimitiveWritableObject(uuidStr));
Assert.assertEquals(uuid, oi.convert(uuidStr));

Text copy = (Text) oi.copyObject(text);

Expand Down

0 comments on commit 5ed15d0

Please sign in to comment.