Skip to content

Commit

Permalink
HIVE-14412: Add timestamp with time zone (Rui Li reviewed by Xuefu Zh…
Browse files Browse the repository at this point in the history
…ang, Pengcheng Xiong, Carter Shanklin, Ashutosh Chauhan)
  • Loading branch information
lirui-apache committed May 11, 2017
1 parent ee91b8e commit 6b6a00f
Show file tree
Hide file tree
Showing 99 changed files with 2,360 additions and 277 deletions.
197 changes: 197 additions & 0 deletions common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.common.type;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.DateTimeException;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.time.format.TextStyle;
import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* This is the internal type for Timestamp with time zone.
* A wrapper of ZonedDateTime which automatically convert the Zone to UTC.
* The full qualified input format of Timestamp with time zone is
* "yyyy-MM-dd HH:mm:ss[.SSS...] zoneid/zoneoffset", where the time and zone parts are optional.
* If time part is absent, a default '00:00:00.0' will be used.
* If zone part is absent, the system time zone will be used.
* All timestamp with time zone will be converted and stored as UTC retaining the instant.
* E.g. "2017-04-14 18:00:00 Asia/Shanghai" will be converted to
* "2017-04-14 10:00:00.0 Z".
*/
public class TimestampTZ implements Comparable<TimestampTZ> {

private static final DateTimeFormatter formatter;
private static final ZoneId UTC = ZoneOffset.UTC;
private static final ZonedDateTime EPOCH = ZonedDateTime.ofInstant(Instant.EPOCH, UTC);
private static final LocalTime DEFAULT_LOCAL_TIME = LocalTime.of(0, 0);
private static final Pattern SINGLE_DIGIT_PATTERN = Pattern.compile("[\\+-]\\d:\\d\\d");
private static final Logger LOG = LoggerFactory.getLogger(TimestampTZ.class);

private static final ThreadLocal<DateFormat> CONVERT_FORMATTER =
ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"));

static {
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
// Date part
builder.append(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
// Time part
builder.optionalStart().appendLiteral(" ").append(DateTimeFormatter.ofPattern("HH:mm:ss")).
optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 1, 9, true).
optionalEnd().optionalEnd();

// Zone part
builder.optionalStart().appendLiteral(" ").optionalEnd();
builder.optionalStart().appendZoneText(TextStyle.NARROW).optionalEnd();

formatter = builder.toFormatter();
}

private ZonedDateTime zonedDateTime;

public TimestampTZ() {
this(EPOCH);
}

public TimestampTZ(ZonedDateTime zonedDateTime) {
setZonedDateTime(zonedDateTime);
}

public TimestampTZ(long seconds, int nanos) {
set(seconds, nanos);
}

public void set(long seconds, int nanos) {
Instant instant = Instant.ofEpochSecond(seconds, nanos);
setZonedDateTime(ZonedDateTime.ofInstant(instant, UTC));
}

public ZonedDateTime getZonedDateTime() {
return zonedDateTime;
}

public void setZonedDateTime(ZonedDateTime zonedDateTime) {
this.zonedDateTime = zonedDateTime != null ? zonedDateTime.withZoneSameInstant(UTC) : EPOCH;
}

@Override
public String toString() {
return zonedDateTime.format(formatter);
}

@Override
public int hashCode() {
return zonedDateTime.toInstant().hashCode();
}

@Override
public boolean equals(Object other) {
if (other instanceof TimestampTZ) {
return compareTo((TimestampTZ) other) == 0;
}
return false;
}

@Override
public int compareTo(TimestampTZ o) {
return zonedDateTime.toInstant().compareTo(o.zonedDateTime.toInstant());
}

public long getEpochSecond() {
return zonedDateTime.toInstant().getEpochSecond();
}

public int getNanos() {
return zonedDateTime.toInstant().getNano();
}

public static TimestampTZ parse(String s) {
// need to handle offset with single digital hour, see JDK-8066806
s = handleSingleDigitHourOffset(s);
ZonedDateTime zonedDateTime;
try {
zonedDateTime = ZonedDateTime.parse(s, formatter);
} catch (DateTimeParseException e) {
// try to be more tolerant
// if the input is invalid instead of incomplete, we'll hit exception here again
TemporalAccessor accessor = formatter.parse(s);
// LocalDate must be present
LocalDate localDate = LocalDate.from(accessor);
LocalTime localTime;
ZoneId zoneId;
try {
localTime = LocalTime.from(accessor);
} catch (DateTimeException e1) {
localTime = DEFAULT_LOCAL_TIME;
}
try {
zoneId = ZoneId.from(accessor);
} catch (DateTimeException e2) {
// TODO: in future this may come from user specified zone (via set time zone command)
zoneId = ZoneId.systemDefault();
}
zonedDateTime = ZonedDateTime.of(localDate, localTime, zoneId);
}

return new TimestampTZ(zonedDateTime);
}

private static String handleSingleDigitHourOffset(String s) {
Matcher matcher = SINGLE_DIGIT_PATTERN.matcher(s);
if (matcher.find()) {
int index = matcher.start() + 1;
s = s.substring(0, index) + "0" + s.substring(index, s.length());
}
return s;
}

public static TimestampTZ parseOrNull(String s) {
try {
return parse(s);
} catch (DateTimeParseException e) {
if (LOG.isDebugEnabled()) {
LOG.debug("Invalid string " + s + " for TIMESTAMP WITH TIME ZONE", e);
}
return null;
}
}

// Converts Date to TimestampTZ. The conversion is done text-wise since
// Date/Timestamp should be treated as description of date/time.
public static TimestampTZ convert(java.util.Date date) {
String s = date instanceof Timestamp ? date.toString() : CONVERT_FORMATTER.get().format(date);
// TODO: in future this may come from user specified zone (via set time zone command)
return parse(s + " " + ZoneId.systemDefault().getId());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package org.apache.hadoop.hive.common.type;

import org.junit.Assert;
import org.junit.Test;

import java.sql.Timestamp;
import java.time.format.DateTimeParseException;
import java.util.TimeZone;

public class TestTimestampTZ {
@Test
public void testConvertToUTC() {
String s = "2017-04-14 18:00:00 Asia/Shanghai";
TimestampTZ timestampTZ = TimestampTZ.parse(s);
Assert.assertEquals("2017-04-14 10:00:00.0 Z", timestampTZ.toString());
}

@Test
public void testComparison() {
String s1 = "2017-04-14 18:00:00 Asia/Shanghai";
String s2 = "2017-04-14 10:00:00.00 GMT";
String s3 = "2017-04-14 18:00:00 UTC+08:00";
String s4 = "2017-04-14 18:00:00 Europe/London";
TimestampTZ tstz1 = TimestampTZ.parse(s1);
TimestampTZ tstz2 = TimestampTZ.parse(s2);
TimestampTZ tstz3 = TimestampTZ.parse(s3);
TimestampTZ tstz4 = TimestampTZ.parse(s4);

Assert.assertEquals(tstz1, tstz2);
Assert.assertEquals(tstz1, tstz3);
Assert.assertEquals(tstz1.hashCode(), tstz2.hashCode());
Assert.assertEquals(tstz1.hashCode(), tstz3.hashCode());
Assert.assertTrue(tstz1.compareTo(tstz4) < 0);
}

@Test
public void testDST() {
String s1 = "2005-04-03 02:01:00 America/Los_Angeles";
String s2 = "2005-04-03 03:01:00 America/Los_Angeles";
Assert.assertEquals(TimestampTZ.parse(s1), TimestampTZ.parse(s2));
}

@Test
public void testFromToInstant() {
String s1 = "2017-04-14 18:00:00 UTC";
TimestampTZ tstz = TimestampTZ.parse(s1);
long seconds = tstz.getEpochSecond();
int nanos = tstz.getNanos();
Assert.assertEquals(tstz, new TimestampTZ(seconds, nanos));

nanos += 123000000;
Assert.assertEquals("2017-04-14 18:00:00.123 Z", new TimestampTZ(seconds, nanos).toString());

seconds -= 3;
Assert.assertEquals("2017-04-14 17:59:57.123 Z", new TimestampTZ(seconds, nanos).toString());
}

@Test
public void testVariations() {
// Omitting zone or time part is allowed
TimestampTZ.parse("2017-01-01 13:33:00");
TimestampTZ.parse("2017-11-08 Europe/London");
TimestampTZ.parse("2017-05-20");
TimestampTZ.parse("2017-11-08GMT");
TimestampTZ.parse("2017-10-11 GMT+8:00");
TimestampTZ.parse("2017-05-08 07:45:00-3:00");
}

@Test
public void testInvalidStrings() {
// invalid zone
try {
TimestampTZ.parse("2017-01-01 13:33:00 foo");
Assert.fail("Invalid timezone ID should cause exception");
} catch (DateTimeParseException e) {
// expected
}
// invalid time part
try {
TimestampTZ.parse("2017-01-01 13:33:61");
Assert.fail("Invalid time should cause exception");
} catch (DateTimeParseException e) {
// expected
}
}

@Test
public void testConvertFromTimestamp() {
TimeZone defaultZone = TimeZone.getDefault();
try {
// Use system zone when converting from timestamp to timestamptz
String s = "2017-06-12 23:12:56.34";
TimeZone.setDefault(TimeZone.getTimeZone("Europe/London"));
TimestampTZ tstz1 = TimestampTZ.convert(Timestamp.valueOf(s));
TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"));
TimestampTZ tstz2 = TimestampTZ.convert(Timestamp.valueOf(s));
Assert.assertTrue(tstz1.compareTo(tstz2) < 0);
} finally {
TimeZone.setDefault(defaultZone);
}
}
}
4 changes: 2 additions & 2 deletions contrib/src/test/queries/clientnegative/serde_regex.q
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ CREATE TABLE serde_regex(
host STRING,
identity STRING,
`user` STRING,
time STRING,
`time` STRING,
request STRING,
status INT,
size INT,
Expand All @@ -25,7 +25,7 @@ CREATE TABLE serde_regex(
host STRING,
identity STRING,
`user` STRING,
time STRING,
`time` STRING,
request STRING,
status INT,
size INT,
Expand Down
6 changes: 3 additions & 3 deletions contrib/src/test/queries/clientpositive/serde_regex.q
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ CREATE TABLE serde_regex(
host STRING,
identity STRING,
`user` STRING,
time STRING,
`time` STRING,
request STRING,
status STRING,
size STRING,
Expand All @@ -23,7 +23,7 @@ CREATE TABLE serde_regex(
host STRING,
identity STRING,
`user` STRING,
time STRING,
`time` STRING,
request STRING,
status STRING,
size STRING,
Expand All @@ -39,4 +39,4 @@ STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH "../../data/files/apache.access.log" INTO TABLE serde_regex;
LOAD DATA LOCAL INPATH "../../data/files/apache.access.2.log" INTO TABLE serde_regex;

SELECT * FROM serde_regex ORDER BY time;
SELECT * FROM serde_regex ORDER BY `time`;
6 changes: 3 additions & 3 deletions contrib/src/test/results/clientnegative/serde_regex.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ CREATE TABLE serde_regex(
host STRING,
identity STRING,
`user` STRING,
time STRING,
`time` STRING,
request STRING,
status INT,
size INT,
Expand All @@ -27,7 +27,7 @@ CREATE TABLE serde_regex(
host STRING,
identity STRING,
`user` STRING,
time STRING,
`time` STRING,
request STRING,
status INT,
size INT,
Expand Down Expand Up @@ -60,7 +60,7 @@ PREHOOK: query: CREATE TABLE serde_regex(
host STRING,
identity STRING,
`user` STRING,
time STRING,
`time` STRING,
request STRING,
status INT,
size INT,
Expand Down
Loading

0 comments on commit 6b6a00f

Please sign in to comment.