Skip to content

Commit

Permalink
HIVE-7144 : Use Text writables directly in ORC dictionaries to avoid …
Browse files Browse the repository at this point in the history
…String allocations (Gopal V, reviewed by Gunther Hagleitner)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1608618 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
t3rmin4t0r committed Jul 7, 2014
1 parent 05a2aff commit 6072e3a
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;

class ColumnStatisticsImpl implements ColumnStatistics {

Expand Down Expand Up @@ -335,8 +336,8 @@ public String toString() {

protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
implements StringColumnStatistics {
private String minimum = null;
private String maximum = null;
private Text minimum = null;
private Text maximum = null;
private long sum = 0;

StringStatisticsImpl() {
Expand All @@ -346,10 +347,10 @@ protected static final class StringStatisticsImpl extends ColumnStatisticsImpl
super(stats);
OrcProto.StringStatistics str = stats.getStringStatistics();
if (str.hasMaximum()) {
maximum = str.getMaximum();
maximum = new Text(str.getMaximum());
}
if (str.hasMinimum()) {
minimum = str.getMinimum();
minimum = new Text(str.getMinimum());
}
if(str.hasSum()) {
sum = str.getSum();
Expand All @@ -365,30 +366,34 @@ void reset() {
}

@Override
void updateString(String value) {
void updateString(Text value) {
if (minimum == null) {
minimum = value;
maximum = value;
maximum = minimum = new Text(value.copyBytes());
} else if (minimum.compareTo(value) > 0) {
minimum = value;
minimum = new Text(value.copyBytes());
} else if (maximum.compareTo(value) < 0) {
maximum = value;
maximum = new Text(value.copyBytes());
}
sum += value.length();
sum += value.getLength();
}

@Override
void merge(ColumnStatisticsImpl other) {
super.merge(other);
StringStatisticsImpl str = (StringStatisticsImpl) other;
if (minimum == null) {
minimum = str.minimum;
maximum = str.maximum;
if(str.minimum != null) {
maximum = new Text(str.getMaximum());
minimum = new Text(str.getMinimum());
} else {
/* both are empty */
maximum = minimum = null;
}
} else if (str.minimum != null) {
if (minimum.compareTo(str.minimum) > 0) {
minimum = str.minimum;
minimum = new Text(str.getMinimum());
} else if (maximum.compareTo(str.maximum) < 0) {
maximum = str.maximum;
maximum = new Text(str.getMaximum());
}
}
sum += str.sum;
Expand All @@ -400,8 +405,8 @@ OrcProto.ColumnStatistics.Builder serialize() {
OrcProto.StringStatistics.Builder str =
OrcProto.StringStatistics.newBuilder();
if (getNumberOfValues() != 0) {
str.setMinimum(minimum);
str.setMaximum(maximum);
str.setMinimum(getMinimum());
str.setMaximum(getMaximum());
str.setSum(sum);
}
result.setStringStatistics(str);
Expand All @@ -410,12 +415,12 @@ OrcProto.ColumnStatistics.Builder serialize() {

@Override
public String getMinimum() {
return minimum;
return minimum == null ? null : minimum.toString();
}

@Override
public String getMaximum() {
return maximum;
return maximum == null ? null : maximum.toString();
}

@Override
Expand All @@ -428,9 +433,9 @@ public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (getNumberOfValues() != 0) {
buf.append(" min: ");
buf.append(minimum);
buf.append(getMinimum());
buf.append(" max: ");
buf.append(maximum);
buf.append(getMaximum());
buf.append(" sum: ");
buf.append(sum);
}
Expand Down Expand Up @@ -733,7 +738,7 @@ void updateDouble(double value) {
throw new UnsupportedOperationException("Can't update double");
}

void updateString(String value) {
void updateString(Text value) {
throw new UnsupportedOperationException("Can't update string");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,23 @@ public StringRedBlackTree(int initialCapacity) {

public int add(String value) {
newKey.set(value);
// if the key is new, add it to our byteArray and store the offset & length
return addNewKey();
}

private int addNewKey() {
// if the newKey is actually new, add it to our byteArray and store the offset & length
if (add()) {
int len = newKey.getLength();
keyOffsets.add(byteArray.add(newKey.getBytes(), 0, len));
}
return lastAdd;
}

public int add(Text value) {
newKey.set(value);
return addNewKey();
}

@Override
protected int compareValue(int position) {
int start = keyOffsets.get(position);
Expand Down
18 changes: 9 additions & 9 deletions ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -1032,20 +1032,20 @@ private static class StringTreeWriter extends TreeWriter {
}

/**
* Method to retrieve string values from the value object, which can be overridden
* Method to retrieve text values from the value object, which can be overridden
* by subclasses.
* @param obj value
* @return String value from obj
* @return Text text value from obj
*/
String getStringValue(Object obj) {
return ((StringObjectInspector) inspector).getPrimitiveJavaObject(obj);
Text getTextValue(Object obj) {
return ((StringObjectInspector) inspector).getPrimitiveWritableObject(obj);
}

@Override
void write(Object obj) throws IOException {
super.write(obj);
if (obj != null) {
String val = getStringValue(obj);
Text val = getTextValue(obj);
rows.add(dictionary.add(val));
indexStatistics.updateString(val);
}
Expand Down Expand Up @@ -1194,9 +1194,9 @@ private static class CharTreeWriter extends StringTreeWriter {
* Override base class implementation to support char values.
*/
@Override
String getStringValue(Object obj) {
Text getTextValue(Object obj) {
return (((HiveCharObjectInspector) inspector)
.getPrimitiveJavaObject(obj)).getValue();
.getPrimitiveWritableObject(obj)).getTextValue();
}
}

Expand All @@ -1216,9 +1216,9 @@ private static class VarcharTreeWriter extends StringTreeWriter {
* Override base class implementation to support varchar values.
*/
@Override
String getStringValue(Object obj) {
Text getTextValue(Object obj) {
return (((HiveVarcharObjectInspector) inspector)
.getPrimitiveJavaObject(obj)).getValue();
.getPrimitiveWritableObject(obj)).getTextValue();
}
}

Expand Down

0 comments on commit 6072e3a

Please sign in to comment.