Skip to content

Commit

Permalink
IMPALA-4962: Fix SHOW COLUMN STATS for HS2
Browse files Browse the repository at this point in the history
Impala incorrectly returned NULLs in the "Max Size" column of the SHOW
COLUMN STATS result when executed through the HS2 interface. The issue
was that the column was specified to be type INT in the result schema,
but the actual type of the contents that we inserted into it was
"long". The reason why this is not an issue in Impala shell is because
we stringify the contents without inspecting the metadata for beeswax
results.

The issue was fixed by changing the type from INT to BIGINT.

Change-Id: I419657744635dfdc2e1562fe60a597617fff446e
Reviewed-on: http://gerrit.cloudera.org:8080/6109
Reviewed-by: Alex Behm <[email protected]>
Tested-by: Impala Public Jenkins
  • Loading branch information
tbobrovytsky authored and jenkins committed Feb 22, 2017
1 parent 0bf6207 commit 1266350
Show file tree
Hide file tree
Showing 12 changed files with 49 additions and 48 deletions.
2 changes: 1 addition & 1 deletion fe/src/main/java/org/apache/impala/service/Frontend.java
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ public TResultSet getColumnStats(String dbName, String tableName)
resultSchema.addToColumns(
new TColumn("#Distinct Values", Type.BIGINT.toThrift()));
resultSchema.addToColumns(new TColumn("#Nulls", Type.BIGINT.toThrift()));
resultSchema.addToColumns(new TColumn("Max Size", Type.INT.toThrift()));
resultSchema.addToColumns(new TColumn("Max Size", Type.BIGINT.toThrift()));
resultSchema.addToColumns(new TColumn("Avg Size", Type.DOUBLE.toThrift()));

for (Column c: table.getColumnsInHiveOrder()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Make sure compute stats still works.
Expand Down Expand Up @@ -66,7 +66,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Also alter a few 'numRows' parameters to make sure manually setting all stats works.
Expand Down Expand Up @@ -119,7 +119,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',-1,-1,4,4
'month','INT',-1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Reset the column stats to an unknown state by setting the values to -1
Expand Down Expand Up @@ -147,5 +147,5 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',-1,-1,4,4
'month','INT',-1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ show column stats $DATABASE2.mv2
'x','INT',2,-1,4,4
'y','STRING',2,-1,1,1
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
drop table $DATABASE2.mv2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'd5','DECIMAL(10,5)',5,-1,8,8
'd6','DECIMAL(9,0)',1,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# test compute stats on a mixed-type parquet table
Expand Down Expand Up @@ -61,5 +61,5 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'a','INT',2,-1,4,4
'b','DECIMAL(10,0)',2,-1,8,8
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
drop incremental stats alltypes_incremental partition(year=2010, month=12)
Expand Down Expand Up @@ -155,7 +155,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
create table incremental_empty_partitioned (i int) partitioned by (j int);
Expand Down Expand Up @@ -256,7 +256,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Confirm that dropping stats drops incremental stats as well
Expand Down Expand Up @@ -495,7 +495,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','CHAR(5)',1,0,5,5
'day','VARCHAR(13)',3,1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Populate a new partition to verify the incremental stats update
Expand Down Expand Up @@ -525,7 +525,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','CHAR(5)',2,0,5,5
'day','VARCHAR(13)',4,1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMPALA-4854: Tests incremental computation in the presence of complex-typed columns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Adding a column shouldn't cause the stats to be dropped.
Expand All @@ -91,7 +91,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'month','INT',12,0,4,4
'new_col','INT',-1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Changing a column shouldn't cause the stats of other columns to be dropped.
Expand Down Expand Up @@ -120,7 +120,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'month','INT',12,0,4,4
'new_col2','INT',-1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Removing a column shouldn't cause the stats to be dropped.
Expand All @@ -146,7 +146,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# drop stats from this table
Expand Down Expand Up @@ -205,7 +205,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Add partitions with NULL values and check for stats.
Expand All @@ -231,7 +231,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',3,1,4,4
'month','INT',13,1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
alter table alltypes add partition (year=2011, month=NULL)
Expand All @@ -256,7 +256,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',4,1,4,4
'month','INT',13,2,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Drop the partitions with NULL values and check for stats.
Expand All @@ -283,7 +283,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',3,0,4,4
'month','INT',13,1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
alter table alltypes drop partition (year=2011, month=NULL)
Expand All @@ -309,7 +309,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# drop stats from this table a second time, should not throw an error.
Expand Down Expand Up @@ -356,7 +356,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'string_col','STRING',10,-1,1,1
'timestamp_col','TIMESTAMP',101,-1,16,16
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMPALA-4767: Test that ALTER TABLE commands preserve table stats.
Expand Down Expand Up @@ -437,7 +437,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMPALA-4767: Test that ALTER TABLE commands preserve table stats.
Expand Down Expand Up @@ -515,7 +515,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMPALA-867: Test computing stats on Avro tables created by Hive with
Expand Down Expand Up @@ -568,7 +568,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Avro table with an extra column definition.
Expand Down Expand Up @@ -607,7 +607,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Avro table with missing two column definitions.
Expand Down Expand Up @@ -643,7 +643,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Avro table with one column definition having a different
Expand Down Expand Up @@ -682,7 +682,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Avro table without an Avro schema created by Hive.
Expand Down Expand Up @@ -721,7 +721,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Test Avro table created without any column definitions.
Expand Down Expand Up @@ -766,7 +766,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMPALA-1104: Test computing stats on Avro tables created by Impala
Expand Down Expand Up @@ -815,7 +815,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMPALA-1104: Test computing stats on Avro tables created by Impala
Expand Down Expand Up @@ -864,7 +864,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMPALA-883: Compute table stats for an empty partition.
Expand Down Expand Up @@ -1009,7 +1009,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','CHAR(5)',1,0,5,5
'day','VARCHAR(13)',3,1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Test that compute stats on a Hive-created Avro table without column defs
Expand All @@ -1036,7 +1036,7 @@ show column stats alltypes_no_coldef
'string_col','STRING',0,-1,0,0
'timestamp_col','STRING',0,-1,0,0
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Test that compute stats works on wide tables.
Expand Down Expand Up @@ -2052,5 +2052,5 @@ show column stats widetable_1000_cols
'double_col125','DOUBLE',5,-1,8,8
'string_col125','STRING',5,-1,1,1
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,5 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'tinyint_col','TINYINT',10,-1,1,1
'year','INT',1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'tinyint_col','TINYINT',10,-1,1,1
'year','INT',1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# test computing stats on an binary HBase table
Expand Down Expand Up @@ -85,7 +85,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'tinyint_col','TINYINT',10,-1,1,1
'year','INT',1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# IMP-1227: Test computing stats on an HBase table that has a
Expand Down Expand Up @@ -129,5 +129,5 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'struct_map_col','MAP<STRING,STRUCT<f1:BIGINT,f2:STRING>>',-1,-1,-1,-1
'year','INT',0,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'tinyint_col','TINYINT',10,-1,1,1
'year','INT',1,-1,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',2,0,4,4
'month','INT',12,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
---- QUERY
# Column column stats for a table with complex types.
Expand All @@ -169,5 +169,5 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
'year','INT',0,0,4,4
'month','INT',0,0,4,4
---- TYPES
STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
====
Loading

0 comments on commit 1266350

Please sign in to comment.