forked from apache/hive
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HIVE-15519: BitSet not computed properly for ColumnBuffer subset (Rui…
… Li reviewed by Thejas M Nair)
- Loading branch information
1 parent
370620f
commit f764155
Showing
4 changed files
with
301 additions
and
150 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
271 changes: 271 additions & 0 deletions
271
serde/src/test/org/apache/hadoop/hive/serde2/thrift/TestColumnBuffer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,271 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* <p> | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* <p> | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.hadoop.hive.serde2.thrift; | ||
|
||
import com.google.code.tempusfugit.concurrency.RepeatingRule; | ||
import com.google.code.tempusfugit.concurrency.annotations.Repeating; | ||
import org.junit.Assert; | ||
import org.junit.Rule; | ||
import org.junit.Test; | ||
import org.junit.runner.RunWith; | ||
import org.junit.runners.Parameterized; | ||
|
||
import java.nio.ByteBuffer; | ||
import java.util.Arrays; | ||
import java.util.BitSet; | ||
import java.util.Collection; | ||
import java.util.HashSet; | ||
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Random; | ||
import java.util.Set; | ||
import java.util.concurrent.ThreadLocalRandom; | ||
|
||
import static org.junit.Assert.assertArrayEquals; | ||
import static org.junit.Assert.assertEquals; | ||
|
||
@RunWith(Parameterized.class) | ||
public class TestColumnBuffer { | ||
@Rule | ||
public RepeatingRule repeatingRule = new RepeatingRule(); | ||
|
||
private static final int NUM_VARS = 100; | ||
private static final int NUM_NULLS = 30; | ||
private static final Set<Integer> nullIndices = new HashSet<>(); | ||
|
||
private final Type type; | ||
private final Object vars; | ||
|
||
@Parameterized.Parameters | ||
public static Collection<Object[]> types() { | ||
return Arrays.asList(new Object[][]{ | ||
{Type.BOOLEAN_TYPE}, | ||
{Type.TINYINT_TYPE}, | ||
{Type.SMALLINT_TYPE}, | ||
{Type.INT_TYPE}, | ||
{Type.BIGINT_TYPE}, | ||
{Type.DOUBLE_TYPE}, | ||
{Type.FLOAT_TYPE}, | ||
{Type.BINARY_TYPE}, | ||
{Type.STRING_TYPE} | ||
} | ||
); | ||
} | ||
|
||
public TestColumnBuffer(Type type) { | ||
this.type = type; | ||
switch (type) { | ||
case BOOLEAN_TYPE: | ||
vars = new boolean[NUM_VARS]; | ||
break; | ||
case TINYINT_TYPE: | ||
vars = new byte[NUM_VARS]; | ||
break; | ||
case SMALLINT_TYPE: | ||
vars = new short[NUM_VARS]; | ||
break; | ||
case INT_TYPE: | ||
vars = new int[NUM_VARS]; | ||
break; | ||
case BIGINT_TYPE: | ||
vars = new long[NUM_VARS]; | ||
break; | ||
case DOUBLE_TYPE: | ||
case FLOAT_TYPE: | ||
vars = new double[NUM_VARS]; | ||
break; | ||
case BINARY_TYPE: | ||
vars = Arrays.asList(new ByteBuffer[NUM_VARS]); | ||
break; | ||
case STRING_TYPE: | ||
vars = Arrays.asList(new String[NUM_VARS]); | ||
break; | ||
default: | ||
throw new IllegalArgumentException("Invalid type " + type); | ||
} | ||
} | ||
|
||
private static void prepareNullIndices() { | ||
nullIndices.clear(); | ||
Random random = ThreadLocalRandom.current(); | ||
while (nullIndices.size() != NUM_NULLS) { | ||
nullIndices.add(random.nextInt(NUM_VARS)); | ||
} | ||
} | ||
|
||
/** | ||
* Test if the nulls BitSet is maintained properly when we extract subset from ColumnBuffer. | ||
* E.g. suppose we have a ColumnBuffer with nulls [0, 0, 1, 0]. When we split it evenly into | ||
* two subsets, the subsets should have nulls [0, 0] and [1, 0] respectively. | ||
*/ | ||
@Test | ||
@Repeating(repetition = 10) | ||
public void testNullsInSubset() { | ||
prepareNullIndices(); | ||
BitSet nulls = new BitSet(NUM_VARS); | ||
for (int index : nullIndices) { | ||
nulls.set(index); | ||
} | ||
|
||
ColumnBuffer columnBuffer = new ColumnBuffer(type, nulls, vars); | ||
Random random = ThreadLocalRandom.current(); | ||
|
||
int remaining = NUM_VARS; | ||
while (remaining > 0) { | ||
int toExtract = random.nextInt(remaining) + 1; | ||
ColumnBuffer subset = columnBuffer.extractSubset(toExtract); | ||
verifyNulls(subset, NUM_VARS - remaining); | ||
remaining -= toExtract; | ||
} | ||
} | ||
|
||
private static void verifyNulls(ColumnBuffer buffer, int shift) { | ||
BitSet nulls = buffer.getNulls(); | ||
for (int i = 0; i < buffer.size(); i++) { | ||
Assert.assertEquals("BitSet in parent and subset not the same.", | ||
nullIndices.contains(i + shift), nulls.get(i)); | ||
} | ||
} | ||
|
||
@Test | ||
public void testAddValues() { | ||
switch (type) { | ||
case BOOLEAN_TYPE: | ||
testBooleanValues(); | ||
break; | ||
case TINYINT_TYPE: | ||
case SMALLINT_TYPE: | ||
case INT_TYPE: | ||
case BIGINT_TYPE: | ||
testAllIntegerTypeValues(); | ||
break; | ||
case DOUBLE_TYPE: | ||
case FLOAT_TYPE: | ||
testFloatAndDoubleValues(); | ||
break; | ||
case BINARY_TYPE: | ||
testBinaryValues(); | ||
break; | ||
case STRING_TYPE: | ||
testStringValues(); | ||
break; | ||
default: | ||
throw new IllegalArgumentException("Invalid type " + type); | ||
} | ||
} | ||
|
||
|
||
private void testAllIntegerTypeValues() { | ||
Map<Type, List<Object>> integerTypesAndValues = new LinkedHashMap<Type, List<Object>>(); | ||
|
||
// Add TINYINT values | ||
integerTypesAndValues.put(Type.TINYINT_TYPE, Arrays.<Object>asList( | ||
Byte.MIN_VALUE, Byte.MAX_VALUE | ||
)); | ||
|
||
// Add SMALLINT values | ||
integerTypesAndValues.put(Type.SMALLINT_TYPE, Arrays.<Object>asList( | ||
Short.MIN_VALUE, Short.MIN_VALUE | ||
)); | ||
|
||
// Add INT values | ||
integerTypesAndValues.put(Type.INT_TYPE, Arrays.<Object>asList( | ||
Integer.MIN_VALUE, Integer.MAX_VALUE | ||
)); | ||
|
||
// Add BIGINT values | ||
integerTypesAndValues.put(Type.BIGINT_TYPE, Arrays.<Object>asList( | ||
Long.MIN_VALUE, Long.MAX_VALUE | ||
)); | ||
|
||
// Validate all integer type values are stored correctly | ||
for (Map.Entry entry : integerTypesAndValues.entrySet()) { | ||
Type type = (Type) entry.getKey(); | ||
List<Object> values = (List) entry.getValue(); | ||
|
||
ColumnBuffer c = new ColumnBuffer(type); | ||
for (Object v : values) { | ||
c.addValue(type, v); | ||
} | ||
|
||
assertEquals(type, c.getType()); | ||
assertEquals(values.size(), c.size()); | ||
|
||
for (int i = 0; i < c.size(); i++) { | ||
assertEquals(values.get(i), c.get(i)); | ||
} | ||
} | ||
} | ||
|
||
|
||
private void testFloatAndDoubleValues() { | ||
ColumnBuffer floatColumn = new ColumnBuffer(Type.FLOAT_TYPE); | ||
floatColumn.addValue(Type.FLOAT_TYPE, 1.1f); | ||
floatColumn.addValue(Type.FLOAT_TYPE, 2.033f); | ||
|
||
// FLOAT_TYPE is treated as DOUBLE_TYPE | ||
assertEquals(Type.FLOAT_TYPE, floatColumn.getType()); | ||
assertEquals(2, floatColumn.size()); | ||
assertEquals(1.1, floatColumn.get(0)); | ||
assertEquals(2.033, floatColumn.get(1)); | ||
|
||
ColumnBuffer doubleColumn = new ColumnBuffer(Type.DOUBLE_TYPE); | ||
doubleColumn.addValue(Type.DOUBLE_TYPE, 1.1); | ||
doubleColumn.addValue(Type.DOUBLE_TYPE, 2.033); | ||
|
||
assertEquals(Type.DOUBLE_TYPE, doubleColumn.getType()); | ||
assertEquals(2, doubleColumn.size()); | ||
assertEquals(1.1, doubleColumn.get(0)); | ||
assertEquals(2.033, doubleColumn.get(1)); | ||
} | ||
|
||
|
||
private void testBooleanValues() { | ||
ColumnBuffer boolColumn = new ColumnBuffer(Type.BOOLEAN_TYPE); | ||
boolColumn.addValue(Type.BOOLEAN_TYPE, true); | ||
boolColumn.addValue(Type.BOOLEAN_TYPE, false); | ||
|
||
assertEquals(Type.BOOLEAN_TYPE, boolColumn.getType()); | ||
assertEquals(2, boolColumn.size()); | ||
assertEquals(true, boolColumn.get(0)); | ||
assertEquals(false, boolColumn.get(1)); | ||
} | ||
|
||
|
||
private void testStringValues() { | ||
ColumnBuffer stringColumn = new ColumnBuffer(Type.STRING_TYPE); | ||
stringColumn.addValue(Type.STRING_TYPE, "12abc456"); | ||
stringColumn.addValue(Type.STRING_TYPE, "~special$&string"); | ||
|
||
assertEquals(Type.STRING_TYPE, stringColumn.getType()); | ||
assertEquals(2, stringColumn.size()); | ||
assertEquals("12abc456", stringColumn.get(0)); | ||
assertEquals("~special$&string", stringColumn.get(1)); | ||
} | ||
|
||
|
||
private void testBinaryValues() { | ||
ColumnBuffer binaryColumn = new ColumnBuffer(Type.BINARY_TYPE); | ||
binaryColumn.addValue(Type.BINARY_TYPE, new byte[]{-1, 0, 3, 4}); | ||
|
||
assertEquals(Type.BINARY_TYPE, binaryColumn.getType()); | ||
assertEquals(1, binaryColumn.size()); | ||
assertArrayEquals(new byte[]{-1, 0, 3, 4}, (byte[]) binaryColumn.get(0)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.