From b8c02524b555536ee0842c54a1e86e37fbfda99f Mon Sep 17 00:00:00 2001 From: Brock Noland Date: Tue, 9 Sep 2014 15:18:29 +0000 Subject: [PATCH] HIVE-6147 - Support avro data stored in HBase columns (Swarnim Kulkarni via Brock) git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1623845 13f79535-47bb-0310-9956-ffa450edef68 --- hbase-handler/if/test/avro_test.avpr | 144 ++++ hbase-handler/pom.xml | 67 ++ .../hadoop/hive/hbase/ColumnMappings.java | 60 +- .../hadoop/hive/hbase/HBaseCompositeKey.java | 12 + .../hive/hbase/HBaseLazyObjectFactory.java | 19 +- .../hadoop/hive/hbase/HBaseRowSerializer.java | 12 +- .../apache/hadoop/hive/hbase/HBaseSerDe.java | 26 +- .../hadoop/hive/hbase/HBaseSerDeHelper.java | 557 +++++++++++++++ .../hive/hbase/HBaseSerDeParameters.java | 266 ++++++- .../hadoop/hive/hbase/LazyHBaseCellMap.java | 12 +- .../hbase/struct/AvroHBaseValueFactory.java | 161 +++++ .../struct/DefaultHBaseValueFactory.java | 64 ++ .../hive/hbase/struct/HBaseValueFactory.java | 64 ++ .../hbase/HBaseTestAvroSchemaRetriever.java | 66 ++ .../hadoop/hive/hbase/TestHBaseSerDe.java | 676 +++++++++++++++++- .../hadoop/hive/hbase/avro/Address.java | 465 ++++++++++++ .../hadoop/hive/hbase/avro/ContactInfo.java | 250 +++++++ .../hadoop/hive/hbase/avro/Employee.java | 356 +++++++++ .../hadoop/hive/hbase/avro/EmployeeAvro.java | 17 + .../apache/hadoop/hive/hbase/avro/Gender.java | 13 + .../hadoop/hive/hbase/avro/HomePhone.java | 194 +++++ .../hadoop/hive/hbase/avro/OfficePhone.java | 194 +++++ .../hadoop/hive/serde/serdeConstants.java | 2 + .../avro/AvroGenericRecordWritable.java | 25 +- .../serde2/avro/AvroLazyObjectInspector.java | 506 +++++++++++++ .../avro/AvroObjectInspectorException.java | 25 + .../avro/AvroObjectInspectorGenerator.java | 2 +- .../hive/serde2/avro/AvroSchemaRetriever.java | 61 ++ .../hive/serde2/avro/AvroSerdeUtils.java | 1 + .../hadoop/hive/serde2/lazy/LazyFactory.java | 61 +- .../hadoop/hive/serde2/lazy/LazyStruct.java | 11 +- .../hadoop/hive/serde2/lazy/LazyUnion.java | 51 +- .../LazyObjectInspectorFactory.java | 47 +- .../LazySimpleStructObjectInspector.java | 28 +- .../ObjectInspectorFactory.java | 2 +- 35 files changed, 4406 insertions(+), 111 deletions(-) create mode 100644 hbase-handler/if/test/avro_test.avpr create mode 100644 hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java create mode 100644 hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java create mode 100644 hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java create mode 100644 hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java create mode 100644 hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java create mode 100644 hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Address.java create mode 100644 hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/ContactInfo.java create mode 100644 hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Employee.java create mode 100644 hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/EmployeeAvro.java create mode 100644 hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Gender.java create mode 100644 hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/HomePhone.java create mode 100644 hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java create mode 100644 serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java create mode 100644 serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java create mode 100644 serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java diff --git a/hbase-handler/if/test/avro_test.avpr b/hbase-handler/if/test/avro_test.avpr new file mode 100644 index 000000000000..86f7fceef5b9 --- /dev/null +++ b/hbase-handler/if/test/avro_test.avpr @@ -0,0 +1,144 @@ +{ +"protocol": "EmployeeAvro", +"namespace": "org.apache.hadoop.hive.hbase.avro", +"types": [ +{ +"type": "enum", +"name": "Gender", +"symbols": [ +"MALE", +"FEMALE" +] +}, +{ +"type": "record", +"name": "HomePhone", +"fields": [ +{ +"name": "areaCode", +"type": "long" +}, +{ +"name": "number", +"type": "long" +} +] +}, +{ +"type": "record", +"name": "OfficePhone", +"fields": [ +{ +"name": "areaCode", +"type": "long" +}, +{ +"name": "number", +"type": "long" +} +] +}, +{ +"type": "record", +"name": "Address", +"fields": [ +{ +"name": "address1", +"type": "string" +}, +{ +"name": "address2", +"type": "string" +}, +{ +"name": "city", +"type": "string" +}, +{ +"name": "zipcode", +"type": "long" +}, +{ +"name": "county", +"type": [ +"HomePhone", +"OfficePhone", +"string", +"null" +] +}, +{ +"name": "aliases", +"type": [ +{ +"type": "array", +"items": "string" +}, +"null" +] +}, +{ +"name": "metadata", +"type": [ +"null", +{ +"type": "map", +"values": "string" +} +] +} +] +}, +{ +"type": "record", +"name": "ContactInfo", +"fields": [ +{ +"name": "address", +"type": [ +{ +"type": "array", +"items": "Address" +}, +"null" +] +}, +{ +"name": "homePhone", +"type": "HomePhone" +}, +{ +"name": "officePhone", +"type": "OfficePhone" +} +] +}, +{ +"type": "record", +"name": "Employee", +"fields": [ +{ +"name": "employeeName", +"type": "string" +}, +{ +"name": "employeeID", +"type": "long" +}, +{ +"name": "age", +"type": "long" +}, +{ +"name": "gender", +"type": "Gender" +}, +{ +"name": "contactInfo", +"type": "ContactInfo" +} +] +} +], +"messages": { } +} diff --git a/hbase-handler/pom.xml b/hbase-handler/pom.xml index 707e52c99025..d9f07630895a 100644 --- a/hbase-handler/pom.xml +++ b/hbase-handler/pom.xml @@ -57,6 +57,11 @@ ${junit.version} test + + org.apache.avro + avro + 1.7.6 + @@ -100,6 +105,12 @@ hbase-hadoop-compat ${hbase.hadoop1.version} + + org.apache.hadoop + hadoop-test + ${hadoop-20S.version} + test + org.apache.hbase hbase-common @@ -132,12 +143,26 @@ ${hadoop-23.version} true + + org.apache.hadoop + hadoop-common + ${hadoop-23.version} + tests + test + org.apache.hadoop hadoop-mapreduce-client-core ${hadoop-23.version} true + + org.apache.hadoop + hadoop-hdfs + ${hadoop-23.version} + tests + test + org.apache.hbase hbase-hadoop2-compat @@ -190,6 +215,12 @@ test-jar test + + com.sun.jersey + jersey-servlet + ${jersey.version} + test + @@ -209,6 +240,42 @@ + + org.apache.avro + avro-maven-plugin + 1.7.6 + + + generate-test-sources + + protocol + + + ${project.basedir}/if/test + ${project.basedir}/src/test + + + + + + org.codehaus.mojo + build-helper-maven-plugin + 1.7 + + + add-test-sources + generate-test-sources + + add-test-source + + + + ${project.basedir}/src/gen/avro/gen-java + + + + + diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java index 8735fbcdd3d8..fbd130853fb1 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java @@ -23,16 +23,21 @@ package org.apache.hadoop.hive.hbase; -import com.google.common.collect.Iterators; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; +import com.google.common.collect.Iterators; public class ColumnMappings implements Iterable { @@ -53,24 +58,41 @@ public int size() { return columnsMapping.length; } - String toTypesString() { + String toNamesString(Properties tbl, String autogenerate) { + if (autogenerate != null && autogenerate.equals("true")) { + StringBuilder sb = new StringBuilder(); + HBaseSerDeHelper.generateColumns(tbl, Arrays.asList(columnsMapping), sb); + return sb.toString(); + } + + return StringUtils.EMPTY; // return empty string + } + + String toTypesString(Properties tbl, Configuration conf, String autogenerate) + throws SerDeException { StringBuilder sb = new StringBuilder(); - for (ColumnMapping colMap : columnsMapping) { - if (sb.length() > 0) { - sb.append(":"); - } - if (colMap.hbaseRowKey) { - // the row key column becomes a STRING - sb.append(serdeConstants.STRING_TYPE_NAME); - } else if (colMap.qualifierName == null) { - // a column family become a MAP - sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + "," - + serdeConstants.STRING_TYPE_NAME + ">"); - } else { - // an individual column becomes a STRING - sb.append(serdeConstants.STRING_TYPE_NAME); + + if (autogenerate != null && autogenerate.equals("true")) { + HBaseSerDeHelper.generateColumnTypes(tbl, Arrays.asList(columnsMapping), sb, conf); + } else { + for (ColumnMapping colMap : columnsMapping) { + if (sb.length() > 0) { + sb.append(":"); + } + if (colMap.hbaseRowKey) { + // the row key column becomes a STRING + sb.append(serdeConstants.STRING_TYPE_NAME); + } else if (colMap.qualifierName == null) { + // a column family become a MAP + sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + "," + + serdeConstants.STRING_TYPE_NAME + ">"); + } else { + // an individual column becomes a STRING + sb.append(serdeConstants.STRING_TYPE_NAME); + } } } + return sb.toString(); } diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java index d18421618b3c..936e22d5ac5a 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.hbase; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Map; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; @@ -94,4 +96,14 @@ public LazyObject toLazyObject(int fieldID, byte[] by return lazyObject; } + + /** + * Return the different parts of the key. By default, this returns an empty map. Consumers can + * choose to override this to provide their own names and types of parts of the key. + * + * @return map of parts name to their type + * */ + public Map getParts() { + return Collections.emptyMap(); + } } diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java index 5c2645696e89..78f23cbbcb0b 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java @@ -18,32 +18,31 @@ package org.apache.hadoop.hive.hbase; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory; import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import java.util.ArrayList; -import java.util.List; - // Does same thing with LazyFactory#createLazyObjectInspector except that this replaces // original keyOI with OI which is create by HBaseKeyFactory provided by serde property for hbase public class HBaseLazyObjectFactory { public static ObjectInspector createLazyHBaseStructInspector( - SerDeParameters serdeParams, int index, HBaseKeyFactory factory) throws SerDeException { + SerDeParameters serdeParams, int index, HBaseKeyFactory keyFactory, List valueFactories) throws SerDeException { List columnTypes = serdeParams.getColumnTypes(); ArrayList columnObjectInspectors = new ArrayList( columnTypes.size()); for (int i = 0; i < columnTypes.size(); i++) { if (i == index) { - columnObjectInspectors.add(factory.createKeyObjectInspector(columnTypes.get(i))); + columnObjectInspectors.add(keyFactory.createKeyObjectInspector(columnTypes.get(i))); } else { - columnObjectInspectors.add(LazyFactory.createLazyObjectInspector( - columnTypes.get(i), serdeParams.getSeparators(), 1, serdeParams.getNullSequence(), - serdeParams.isEscaped(), serdeParams.getEscapeChar())); + columnObjectInspectors.add(valueFactories.get(i).createValueObjectInspector( + columnTypes.get(i))); } } return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector( @@ -51,4 +50,4 @@ public static ObjectInspector createLazyHBaseStructInspector( serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(), serdeParams.isEscaped(), serdeParams.getEscapeChar()); } -} +} \ No newline at end of file diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java index fe6081e94529..c6c42b479672 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hive.hbase; +import java.io.IOException; +import java.util.List; +import java.util.Map; + import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping; import org.apache.hadoop.hive.serde2.ByteStream; @@ -34,10 +38,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Writable; -import java.io.IOException; -import java.util.List; -import java.util.Map; - public class HBaseRowSerializer { private final HBaseKeyFactory keyFactory; @@ -279,6 +279,10 @@ private boolean serialize( } } return true; + case UNION: { + // union type currently not totally supported. See HIVE-2390 + return false; + } default: throw new RuntimeException("Unknown category type: " + objInspector.getCategory()); } diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java index 6c1ce5c1c725..ca2f40e11abf 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hive.hbase; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -25,6 +29,7 @@ import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; @@ -33,10 +38,6 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - /** * HBaseSerDe can be used to serialize object into an HBase table and * deserialize objects from an HBase table. @@ -50,15 +51,21 @@ public class HBaseSerDe extends AbstractSerDe { public static final String HBASE_KEY_COL = ":key"; public static final String HBASE_PUT_TIMESTAMP = "hbase.put.timestamp"; public static final String HBASE_COMPOSITE_KEY_CLASS = "hbase.composite.key.class"; + public static final String HBASE_COMPOSITE_KEY_TYPES = "hbase.composite.key.types"; public static final String HBASE_COMPOSITE_KEY_FACTORY = "hbase.composite.key.factory"; public static final String HBASE_SCAN_CACHE = "hbase.scan.cache"; public static final String HBASE_SCAN_CACHEBLOCKS = "hbase.scan.cacheblock"; public static final String HBASE_SCAN_BATCH = "hbase.scan.batch"; + public static final String HBASE_AUTOGENERATE_STRUCT = "hbase.struct.autogenerate"; /** - * Determines whether a regex matching should be done on the columns or not. Defaults to true. - * WARNING: Note that currently this only supports the suffix wildcard .* + * Determines whether a regex matching should be done on the columns or not. Defaults to true. + * WARNING: Note that currently this only supports the suffix wildcard .* */ public static final String HBASE_COLUMNS_REGEX_MATCHING = "hbase.columns.mapping.regex.matching"; + /** + * Defines the type for a column. + **/ + public static final String SERIALIZATION_TYPE = "serialization.type"; private ObjectInspector cachedObjectInspector; private LazyHBaseRow cachedHBaseRow; @@ -83,8 +90,11 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException { serdeParams = new HBaseSerDeParameters(conf, tbl, getClass().getName()); - cachedObjectInspector = HBaseLazyObjectFactory.createLazyHBaseStructInspector( - serdeParams.getSerdeParams(), serdeParams.getKeyIndex(), serdeParams.getKeyFactory()); + cachedObjectInspector = + HBaseLazyObjectFactory + .createLazyHBaseStructInspector(serdeParams.getSerdeParams(), + serdeParams.getKeyIndex(), serdeParams.getKeyFactory(), + serdeParams.getValueFactories()); cachedHBaseRow = new LazyHBaseRow( (LazySimpleStructObjectInspector) cachedObjectInspector, diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java new file mode 100644 index 000000000000..25a9cfc37d97 --- /dev/null +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java @@ -0,0 +1,557 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.hbase; + +import static org.apache.hadoop.hive.hbase.HBaseSerDeParameters.AVRO_SERIALIZATION_TYPE; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.avro.Schema; +import org.apache.avro.reflect.ReflectData; +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.avro.AvroObjectInspectorGenerator; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.util.StringUtils; + +/** + * Helper class for {@link HBaseSerDe} + * */ +public class HBaseSerDeHelper { + + /** + * Logger + * */ + public static final Log LOG = LogFactory.getLog(HBaseSerDeHelper.class); + + /** + * Autogenerates the columns from the given serialization class + * + * @param tbl the hive table properties + * @param columnsMapping the hbase columns mapping determining hbase column families and + * qualifiers + * @param sb StringBuilder to form the list of columns + * @throws IllegalArgumentException if any of the given arguments was null + * */ + public static void generateColumns(Properties tbl, List columnsMapping, + StringBuilder sb) { + // Generate the columns according to the column mapping provided + // Note: The generated column names are same as the + // family_name.qualifier_name. If the qualifier + // name is null, each column is familyname_col[i] where i is the index of + // the column ranging + // from 0 to n-1 where n is the size of the column mapping. The filter + // function removes any + // special characters other than alphabets and numbers from the column + // family and qualifier name + // as the only special character allowed in a column name is "_" which is + // used as a separator + // between the column family and qualifier name. + + if (columnsMapping == null) { + throw new IllegalArgumentException("columnsMapping cannot be null"); + } + + if (sb == null) { + throw new IllegalArgumentException("StringBuilder cannot be null"); + } + + for (int i = 0; i < columnsMapping.size(); i++) { + ColumnMapping colMap = columnsMapping.get(i); + + if (colMap.hbaseRowKey) { + sb.append("key").append(StringUtils.COMMA_STR); + } else if (colMap.qualifierName == null) { + // this corresponds to a map + + if (colMap.qualifierPrefix != null) { + sb.append(filter(colMap.familyName)).append("_") + .append(filter(colMap.qualifierPrefix) + i).append(StringUtils.COMMA_STR); + } else { + sb.append(filter(colMap.familyName)).append("_").append("col" + i) + .append(StringUtils.COMMA_STR); + } + } else { + // just an individual column + sb.append(filter(colMap.familyName)).append("_").append(filter(colMap.qualifierName)) + .append(StringUtils.COMMA_STR); + } + } + + // trim off the ending ",", if any + trim(sb); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated columns: [" + sb.toString() + "]"); + } + } + + /** + * Autogenerates the column types from the given serialization class + * + * @param tbl the hive table properties + * @param columnsMapping the hbase columns mapping determining hbase column families and + * qualifiers + * @param sb StringBuilder to form the list of columns + * @param conf configuration + * @throws IllegalArgumentException if any of the given arguments was null + * @throws SerDeException if there was an error generating the column types + * */ + public static void generateColumnTypes(Properties tbl, List columnsMapping, + StringBuilder sb, Configuration conf) throws SerDeException { + + if (tbl == null) { + throw new IllegalArgumentException("tbl cannot be null"); + } + + if (columnsMapping == null) { + throw new IllegalArgumentException("columnsMapping cannot be null"); + } + + if (sb == null) { + throw new IllegalArgumentException("StringBuilder cannot be null"); + } + + // Generate the columns according to the column mapping provided + for (int i = 0; i < columnsMapping.size(); i++) { + if (sb.length() > 0) { + sb.append(":"); + } + + ColumnMapping colMap = columnsMapping.get(i); + + if (colMap.hbaseRowKey) { + + Map compositeKeyParts = getCompositeKeyParts(tbl); + StringBuilder keyStruct = new StringBuilder(); + + if (compositeKeyParts == null || compositeKeyParts.isEmpty()) { + String compKeyClass = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS); + String compKeyTypes = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_TYPES); + + if (compKeyTypes == null) { + + if (compKeyClass != null) { + // a composite key class was provided. But neither the types + // property was set and + // neither the getParts() method of HBaseCompositeKey was + // overidden in the + // implementation. Flag exception. + throw new SerDeException( + "Either the hbase.composite.key.types property should be set or the getParts method must be overridden in " + + compKeyClass); + } + + // the row key column becomes a STRING + sb.append(serdeConstants.STRING_TYPE_NAME); + } else { + generateKeyStruct(compKeyTypes, keyStruct); + } + } else { + generateKeyStruct(compositeKeyParts, keyStruct); + } + sb.append(keyStruct); + } else if (colMap.qualifierName == null) { + + String serClassName = null; + String serType = null; + String schemaLiteral = null; + String schemaUrl = null; + + if (colMap.qualifierPrefix != null) { + + serType = + tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + + HBaseSerDe.SERIALIZATION_TYPE); + + if (serType == null) { + throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE + + " property not provided for column family [" + colMap.familyName + + "] and prefix [" + colMap.qualifierPrefix + "]"); + } + + // we are provided with a prefix + serClassName = + tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + + serdeConstants.SERIALIZATION_CLASS); + + if (serClassName == null) { + if (serType.equalsIgnoreCase(HBaseSerDeParameters.AVRO_SERIALIZATION_TYPE)) { + // for avro type, the serialization class parameter is optional + schemaLiteral = + tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + + AvroSerdeUtils.SCHEMA_LITERAL); + schemaUrl = + tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + + AvroSerdeUtils.SCHEMA_URL); + + if (schemaLiteral == null && schemaUrl == null) { + // either schema literal, schema url or serialization class must + // be provided + throw new SerDeException("For an avro schema, either " + + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL + " or " + + serdeConstants.SERIALIZATION_CLASS + " property must be set."); + } + + if (schemaUrl != null) { + schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString(); + } + + } else { + throw new SerDeException(serdeConstants.SERIALIZATION_CLASS + + " property not provided for column family [" + colMap.familyName + + "] and prefix [" + colMap.qualifierPrefix + "]"); + } + } + } else { + serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE); + + if (serType == null) { + throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE + + " property not provided for column family [" + colMap.familyName + "]"); + } + + serClassName = + tbl.getProperty(colMap.familyName + "." + serdeConstants.SERIALIZATION_CLASS); + + if (serClassName == null) { + + if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) { + // for avro type, the serialization class parameter is optional + schemaLiteral = + tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_LITERAL); + schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_URL); + + if (schemaLiteral == null && schemaUrl == null) { + // either schema literal or serialization class must be provided + throw new SerDeException("For an avro schema, either " + + AvroSerdeUtils.SCHEMA_LITERAL + " property or " + + serdeConstants.SERIALIZATION_CLASS + " property must be set."); + } + + if (schemaUrl != null) { + schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString(); + } + } else { + throw new SerDeException(serdeConstants.SERIALIZATION_CLASS + + " property not provided for column family [" + colMap.familyName + "]"); + } + } + } + + StringBuilder generatedStruct = new StringBuilder(); + + // generate struct for each of the given prefixes + generateColumnStruct(serType, serClassName, schemaLiteral, colMap, generatedStruct); + + // a column family becomes a MAP + sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + "," + + generatedStruct + ">"); + + } else { + + String qualifierName = colMap.qualifierName; + + if (colMap.qualifierName.endsWith("*")) { + // we are provided with a prefix + qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1); + } + + String serType = + tbl.getProperty(colMap.familyName + "." + qualifierName + "." + + HBaseSerDe.SERIALIZATION_TYPE); + + if (serType == null) { + throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE + + " property not provided for column family [" + colMap.familyName + + "] and qualifier [" + qualifierName + "]"); + } + + String serClassName = + tbl.getProperty(colMap.familyName + "." + qualifierName + "." + + serdeConstants.SERIALIZATION_CLASS); + + String schemaLiteral = null; + String schemaUrl = null; + + if (serClassName == null) { + + if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) { + // for avro type, the serialization class parameter is optional + schemaLiteral = + tbl.getProperty(colMap.familyName + "." + qualifierName + "." + + AvroSerdeUtils.SCHEMA_LITERAL); + schemaUrl = + tbl.getProperty(colMap.familyName + "." + qualifierName + "." + + AvroSerdeUtils.SCHEMA_URL); + + if (schemaLiteral == null && schemaUrl == null) { + // either schema literal, schema url or serialization class must + // be provided + throw new SerDeException("For an avro schema, either " + + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL + " or " + + serdeConstants.SERIALIZATION_CLASS + " property must be set."); + } + + if (schemaUrl != null) { + schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString(); + } + } else { + throw new SerDeException(serdeConstants.SERIALIZATION_CLASS + + " property not provided for column family [" + colMap.familyName + + "] and qualifier [" + qualifierName + "]"); + } + } + + StringBuilder generatedStruct = new StringBuilder(); + + generateColumnStruct(serType, serClassName, schemaLiteral, colMap, generatedStruct); + + sb.append(generatedStruct); + } + } + + // trim off ending ",", if any + trim(sb); + + if (LOG.isDebugEnabled()) { + LOG.debug("Generated column types: [" + sb.toString() + "]"); + } + } + + /** + * Read the schema from the given hdfs url for the schema + * */ + public static Schema getSchemaFromFS(String schemaFSUrl, Configuration conf) + throws SerDeException { + FSDataInputStream in = null; + FileSystem fs = null; + try { + fs = FileSystem.get(new URI(schemaFSUrl), conf); + in = fs.open(new Path(schemaFSUrl)); + Schema s = Schema.parse(in); + return s; + } catch (URISyntaxException e) { + throw new SerDeException("Failure reading schema from filesystem", e); + } catch (IOException e) { + throw new SerDeException("Failure reading schema from filesystem", e); + } finally { + IOUtils.closeQuietly(in); + } + } + + /** + * Auto-generates the key struct for composite keys + * + * @param compositeKeyParts map of composite key part name to its type. Usually this would be + * provided by the custom implementation of {@link HBaseCompositeKey composite key} + * @param sb StringBuilder object to construct the struct + * */ + private static void generateKeyStruct(Map compositeKeyParts, StringBuilder sb) { + sb.append("struct<"); + + for (Entry entry : compositeKeyParts.entrySet()) { + sb.append(entry.getKey()).append(":").append(entry.getValue()).append(","); + } + + // trim the trailing "," + trim(sb); + sb.append(">"); + } + + /** + * Auto-generates the key struct for composite keys + * + * @param compositeKeyTypes comma separated list of composite key types in order + * @param sb StringBuilder object to construct the struct + * */ + private static void generateKeyStruct(String compositeKeyTypes, StringBuilder sb) { + sb.append("struct<"); + + // composite key types is a comma separated list of different parts of the + // composite keys in + // order in which they appear in the key + String[] keyTypes = compositeKeyTypes.split(","); + + for (int i = 0; i < keyTypes.length; i++) { + sb.append("col" + i).append(":").append(keyTypes[i]).append(StringUtils.COMMA_STR); + } + + // trim the trailing "," + trim(sb); + sb.append(">"); + } + + /** + * Auto-generates the column struct + * + * @param serType serialization type + * @param serClassName serialization class name + * @param schemaLiteral schema string + * @param colMap hbase column mapping + * @param sb StringBuilder to hold the generated struct + * @throws SerDeException if something goes wrong while generating the struct + * */ + private static void generateColumnStruct(String serType, String serClassName, + String schemaLiteral, ColumnMapping colMap, StringBuilder sb) throws SerDeException { + + if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) { + + if (serClassName != null) { + generateAvroStructFromClass(serClassName, sb); + } else { + generateAvroStructFromSchema(schemaLiteral, sb); + } + } else { + throw new SerDeException("Unknown " + HBaseSerDe.SERIALIZATION_TYPE + + " found for column family [" + colMap.familyName + "]"); + } + } + + /** + * Auto-generate the avro struct from class + * + * @param serClassName serialization class for avro struct + * @param sb StringBuilder to hold the generated struct + * @throws SerDeException if something goes wrong while generating the struct + * */ + private static void generateAvroStructFromClass(String serClassName, StringBuilder sb) + throws SerDeException { + Class serClass; + try { + serClass = Class.forName(serClassName); + } catch (ClassNotFoundException e) { + throw new SerDeException("Error obtaining descriptor for " + serClassName, e); + } + + Schema schema = ReflectData.get().getSchema(serClass); + + generateAvroStructFromSchema(schema, sb); + } + + /** + * Auto-generate the avro struct from schema + * + * @param schemaLiteral schema for the avro struct as string + * @param sb StringBuilder to hold the generated struct + * @throws SerDeException if something goes wrong while generating the struct + * */ + private static void generateAvroStructFromSchema(String schemaLiteral, StringBuilder sb) + throws SerDeException { + Schema schema = Schema.parse(schemaLiteral); + + generateAvroStructFromSchema(schema, sb); + } + + /** + * Auto-generate the avro struct from schema + * + * @param schema schema for the avro struct + * @param sb StringBuilder to hold the generated struct + * @throws SerDeException if something goes wrong while generating the struct + * */ + private static void generateAvroStructFromSchema(Schema schema, StringBuilder sb) + throws SerDeException { + AvroObjectInspectorGenerator avig = new AvroObjectInspectorGenerator(schema); + + sb.append("struct<"); + + // Get the column names and their corresponding types + List columnNames = avig.getColumnNames(); + List columnTypes = avig.getColumnTypes(); + + if (columnNames.size() != columnTypes.size()) { + throw new AssertionError("The number of column names should be the same as column types"); + } + + for (int i = 0; i < columnNames.size(); i++) { + sb.append(columnNames.get(i)); + sb.append(":"); + sb.append(columnTypes.get(i).getTypeName()); + sb.append(","); + } + + trim(sb).append(">"); + } + + /** + * Trims by removing the trailing "," if any + * + * @param sb StringBuilder to trim + * @return StringBuilder trimmed StringBuilder + * */ + private static StringBuilder trim(StringBuilder sb) { + if (sb.charAt(sb.length() - 1) == StringUtils.COMMA) { + return sb.deleteCharAt(sb.length() - 1); + } + + return sb; + } + + /** + * Filters the given name by removing any special character and convert to lowercase + * */ + private static String filter(String name) { + return name.replaceAll("[^a-zA-Z0-9]+", "").toLowerCase(); + } + + /** + * Return the types for the composite key. + * + * @param tbl Properties for the table + * @return a comma-separated list of composite key types + * @throws SerDeException if something goes wrong while getting the composite key parts + * */ + @SuppressWarnings("unchecked") + private static Map getCompositeKeyParts(Properties tbl) throws SerDeException { + String compKeyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS); + + if (compKeyClassName == null) { + // no custom composite key class provided. return null + return null; + } + + CompositeHBaseKeyFactory keyFactory = null; + + Class keyClass; + try { + keyClass = Class.forName(compKeyClassName); + keyFactory = new CompositeHBaseKeyFactory(keyClass); + } catch (Exception e) { + throw new SerDeException(e); + } + + HBaseCompositeKey compKey = keyFactory.createKey(null); + return compKey.getParts(); + } +} \ No newline at end of file diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java index 8ea4408b05fe..8878eb59a8f5 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java @@ -18,13 +18,20 @@ package org.apache.hadoop.hive.hbase; +import java.util.ArrayList; import java.util.List; import java.util.Properties; +import org.apache.avro.Schema; +import org.apache.avro.reflect.ReflectData; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping; +import org.apache.hadoop.hive.hbase.struct.AvroHBaseValueFactory; +import org.apache.hadoop.hive.hbase.struct.DefaultHBaseValueFactory; +import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -37,10 +44,12 @@ */ public class HBaseSerDeParameters { + public static final String AVRO_SERIALIZATION_TYPE = "avro"; + public static final String STRUCT_SERIALIZATION_TYPE = "struct"; + private final SerDeParameters serdeParams; private final Configuration job; - private final Properties tbl; private final String columnMappingString; private final ColumnMappings columnMappings; @@ -48,57 +57,50 @@ public class HBaseSerDeParameters { private final long putTimestamp; private final HBaseKeyFactory keyFactory; + private final List valueFactories; HBaseSerDeParameters(Configuration job, Properties tbl, String serdeName) throws SerDeException { this.job = job; - this.tbl = tbl; - this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName); - this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1")); // Read configuration parameters columnMappingString = tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING); - doColumnRegexMatching = Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true")); + doColumnRegexMatching = + Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true")); // Parse and initialize the HBase columns mapping columnMappings = HBaseSerDe.parseColumnsMapping(columnMappingString, doColumnRegexMatching); - columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(), serdeParams.getColumnTypes()); - - // Precondition: make sure this is done after the rest of the SerDe initialization is done. - String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE); - columnMappings.parseColumnStorageTypes(hbaseTableStorageType); // Build the type property string if not supplied String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); - if (columnTypeProperty == null) { - tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, columnMappings.toTypesString()); - } + String autogenerate = tbl.getProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT); - this.keyFactory = initKeyFactory(job, tbl); - } + if (columnTypeProperty == null || columnTypeProperty.isEmpty()) { + String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); + if (columnNameProperty == null || columnNameProperty.isEmpty()) { + if (autogenerate == null || autogenerate.isEmpty()) { + throw new IllegalArgumentException("Either the columns must be specified or the " + + HBaseSerDe.HBASE_AUTOGENERATE_STRUCT + " property must be set to true."); + } - private HBaseKeyFactory initKeyFactory(Configuration conf, Properties tbl) throws SerDeException { - try { - HBaseKeyFactory keyFactory = createKeyFactory(conf, tbl); - if (keyFactory != null) { - keyFactory.init(this, tbl); + tbl.setProperty(serdeConstants.LIST_COLUMNS, + columnMappings.toNamesString(tbl, autogenerate)); } - return keyFactory; - } catch (Exception e) { - throw new SerDeException(e); - } - } - private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tbl) throws Exception { - String factoryClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_FACTORY); - if (factoryClassName != null) { - Class factoryClazz = Class.forName(factoryClassName); - return (HBaseKeyFactory) ReflectionUtils.newInstance(factoryClazz, job); + tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, + columnMappings.toTypesString(tbl, job, autogenerate)); } - String keyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS); - if (keyClassName != null) { - Class keyClass = Class.forName(keyClassName); - return new CompositeHBaseKeyFactory(keyClass); - } - return new DefaultHBaseKeyFactory(); + + this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName); + this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1")); + + columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(), + serdeParams.getColumnTypes()); + + // Precondition: make sure this is done after the rest of the SerDe initialization is done. + String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE); + columnMappings.parseColumnStorageTypes(hbaseTableStorageType); + + this.keyFactory = initKeyFactory(job, tbl); + this.valueFactories = initValueFactories(job, tbl); } public List getColumnNames() { @@ -133,6 +135,10 @@ public HBaseKeyFactory getKeyFactory() { return keyFactory; } + public List getValueFactories() { + return valueFactories; + } + public Configuration getBaseConfiguration() { return job; } @@ -151,4 +157,190 @@ public TypeInfo getTypeForName(String columnName) { public String toString() { return "[" + columnMappingString + ":" + getColumnNames() + ":" + getColumnTypes() + "]"; } -} + + private HBaseKeyFactory initKeyFactory(Configuration conf, Properties tbl) throws SerDeException { + try { + HBaseKeyFactory keyFactory = createKeyFactory(conf, tbl); + if (keyFactory != null) { + keyFactory.init(this, tbl); + } + return keyFactory; + } catch (Exception e) { + throw new SerDeException(e); + } + } + + private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tbl) + throws Exception { + String factoryClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_FACTORY); + if (factoryClassName != null) { + Class factoryClazz = Class.forName(factoryClassName); + return (HBaseKeyFactory) ReflectionUtils.newInstance(factoryClazz, job); + } + String keyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS); + if (keyClassName != null) { + Class keyClass = Class.forName(keyClassName); + return new CompositeHBaseKeyFactory(keyClass); + } + return new DefaultHBaseKeyFactory(); + } + + private List initValueFactories(Configuration conf, Properties tbl) + throws SerDeException { + List valueFactories = createValueFactories(conf, tbl); + + for (HBaseValueFactory valueFactory : valueFactories) { + valueFactory.init(this, conf, tbl); + } + + return valueFactories; + } + + private List createValueFactories(Configuration conf, Properties tbl) + throws SerDeException { + List valueFactories = new ArrayList(); + + try { + for (int i = 0; i < columnMappings.size(); i++) { + String serType = getSerializationType(conf, tbl, columnMappings.getColumnsMapping()[i]); + + if (serType != null && serType.equals(AVRO_SERIALIZATION_TYPE)) { + Schema schema = getSchema(conf, tbl, columnMappings.getColumnsMapping()[i]); + valueFactories.add(new AvroHBaseValueFactory(schema)); + } else { + valueFactories.add(new DefaultHBaseValueFactory()); + } + } + } catch (Exception e) { + throw new SerDeException(e); + } + + return valueFactories; + } + + /** + * Get the type for the given {@link ColumnMapping colMap} + * */ + private String getSerializationType(Configuration conf, Properties tbl, + ColumnMapping colMap) throws Exception { + String serType = null; + + if (colMap.qualifierName == null) { + // only a column family + + if (colMap.qualifierPrefix != null) { + serType = tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + + HBaseSerDe.SERIALIZATION_TYPE); + } else { + serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE); + } + } else if (!colMap.hbaseRowKey) { + // not an hbase row key. This should either be a prefix or an individual qualifier + String qualifierName = colMap.qualifierName; + + if (colMap.qualifierName.endsWith("*")) { + qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1); + } + + serType = + tbl.getProperty(colMap.familyName + "." + qualifierName + "." + + HBaseSerDe.SERIALIZATION_TYPE); + } + + return serType; + } + + private Schema getSchema(Configuration conf, Properties tbl, ColumnMapping colMap) + throws Exception { + String serType = null; + String serClassName = null; + String schemaLiteral = null; + String schemaUrl = null; + + if (colMap.qualifierName == null) { + // only a column family + + if (colMap.qualifierPrefix != null) { + serType = + tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + + HBaseSerDe.SERIALIZATION_TYPE); + + serClassName = + tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + + serdeConstants.SERIALIZATION_CLASS); + + schemaLiteral = + tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + + AvroSerdeUtils.SCHEMA_LITERAL); + + schemaUrl = + tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "." + + AvroSerdeUtils.SCHEMA_URL); + } else { + serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE); + + serClassName = + tbl.getProperty(colMap.familyName + "." + serdeConstants.SERIALIZATION_CLASS); + + schemaLiteral = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_LITERAL); + + schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_URL); + } + } else if (!colMap.hbaseRowKey) { + // not an hbase row key. This should either be a prefix or an individual qualifier + String qualifierName = colMap.qualifierName; + + if (colMap.qualifierName.endsWith("*")) { + qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1); + } + + serType = + tbl.getProperty(colMap.familyName + "." + qualifierName + "." + + HBaseSerDe.SERIALIZATION_TYPE); + + serClassName = + tbl.getProperty(colMap.familyName + "." + qualifierName + "." + + serdeConstants.SERIALIZATION_CLASS); + + schemaLiteral = + tbl.getProperty(colMap.familyName + "." + qualifierName + "." + + AvroSerdeUtils.SCHEMA_LITERAL); + + schemaUrl = + tbl.getProperty(colMap.familyName + "." + qualifierName + "." + AvroSerdeUtils.SCHEMA_URL); + } + + String avroSchemaRetClass = tbl.getProperty(AvroSerdeUtils.SCHEMA_RETRIEVER); + + if (schemaLiteral == null && serClassName == null && schemaUrl == null + && avroSchemaRetClass == null) { + throw new IllegalArgumentException("serialization.type was set to [" + serType + + "] but neither " + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL + + ", serialization.class or " + AvroSerdeUtils.SCHEMA_RETRIEVER + " property was set"); + } + + Class deserializerClass = null; + + if (serClassName != null) { + deserializerClass = conf.getClassByName(serClassName); + } + + Schema schema = null; + + // only worry about getting schema if we are dealing with Avro + if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) { + if (avroSchemaRetClass == null) { + // bother about generating a schema only if a schema retriever class wasn't provided + if (schemaLiteral != null) { + schema = Schema.parse(schemaLiteral); + } else if (schemaUrl != null) { + schema = HBaseSerDeHelper.getSchemaFromFS(schemaUrl, conf); + } else if (deserializerClass != null) { + schema = ReflectData.get().getSchema(deserializerClass); + } + } + } + + return schema; + } +} \ No newline at end of file diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java index cedef102eb84..2727b364f39a 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java @@ -149,8 +149,16 @@ public Object getMapValueElement(Object key) { } if (keyI.equals(key)) { // Got a match, return the value - LazyObject v = (LazyObject) entry.getValue(); - return v == null ? v : v.getObject(); + Object _value = entry.getValue(); + + // If the given value is a type of LazyObject, then only try and convert it to that form. + // Else return it as it is. + if (_value instanceof LazyObject) { + LazyObject v = (LazyObject) entry.getValue(); + return v == null ? v : v.getObject(); + } else { + return _value; + } } } diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java new file mode 100644 index 000000000000..c341c0afdcae --- /dev/null +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.hbase.struct; + +import java.io.IOException; +import java.util.Properties; + +import org.apache.avro.Schema; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.hbase.HBaseSerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector; +import org.apache.hadoop.hive.serde2.avro.AvroSchemaRetriever; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * Avro specific implementation of the {@link HBaseValueFactory} + * */ +public class AvroHBaseValueFactory extends DefaultHBaseValueFactory { + + private AvroSchemaRetriever avroSchemaRetriever; + private Schema schema; + + /** + * Constructor + * + * @param schema the associated {@link Schema schema} + * */ + public AvroHBaseValueFactory(Schema schema) { + this.schema = schema; + } + + @Override + public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Properties properties) + throws SerDeException { + super.init(hbaseParams, conf, properties); + String avroSchemaRetClass = properties.getProperty(AvroSerdeUtils.SCHEMA_RETRIEVER); + + if (avroSchemaRetClass != null) { + Class avroSchemaRetrieverClass = null; + try { + avroSchemaRetrieverClass = conf.getClassByName(avroSchemaRetClass); + } catch (ClassNotFoundException e) { + throw new SerDeException(e); + } + + initAvroSchemaRetriever(avroSchemaRetrieverClass, conf, properties); + } + } + + @Override + public ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException { + ObjectInspector oi = + LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(), 1, + serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), + ObjectInspectorOptions.AVRO); + + // initialize the object inspectors + initInternalObjectInspectors(oi); + + return oi; + } + + @Override + public byte[] serializeValue(Object object, StructField field) throws IOException { + // Explicit avro serialization not supported yet. Revert to default + return super.serializeValue(object, field); + } + + /** + * Initialize the instance for {@link AvroSchemaRetriever} + * + * @throws SerDeException + * */ + private void initAvroSchemaRetriever(Class avroSchemaRetrieverClass, Configuration conf, + Properties tbl) throws SerDeException { + + try { + avroSchemaRetriever = (AvroSchemaRetriever) avroSchemaRetrieverClass.getDeclaredConstructor( + Configuration.class, Properties.class).newInstance( + conf, tbl); + } catch (NoSuchMethodException e) { + // the constructor wasn't defined in the implementation class. Flag error + throw new SerDeException("Constructor not defined in schema retriever class [" + avroSchemaRetrieverClass.getName() + "]", e); + } catch (Exception e) { + throw new SerDeException(e); + } + } + + /** + * Initialize the internal object inspectors + * */ + private void initInternalObjectInspectors(ObjectInspector oi) { + if (oi instanceof AvroLazyObjectInspector) { + initAvroObjectInspector(oi); + } else if (oi instanceof MapObjectInspector) { + // we found a map objectinspector. Grab the objectinspector for the value and initialize it + // aptly + ObjectInspector valueOI = ((MapObjectInspector) oi).getMapValueObjectInspector(); + + if (valueOI instanceof AvroLazyObjectInspector) { + initAvroObjectInspector(valueOI); + } + } + } + + /** + * Recursively initialize the {@link AvroLazyObjectInspector} and all its nested ois + * + * @param oi ObjectInspector to be recursively initialized + * @param schema {@link Schema} to be initialized with + * @param schemaRetriever class to be used to retrieve schema + * */ + private void initAvroObjectInspector(ObjectInspector oi) { + // Check for a list. If found, recursively init its members + if (oi instanceof ListObjectInspector) { + ListObjectInspector loi = (ListObjectInspector) oi; + + initAvroObjectInspector(loi.getListElementObjectInspector()); + return; + } + + // Check for a nested message. If found, set the schema, else return. + if (!(oi instanceof AvroLazyObjectInspector)) { + return; + } + + AvroLazyObjectInspector aoi = (AvroLazyObjectInspector) oi; + + aoi.setSchemaRetriever(avroSchemaRetriever); + aoi.setReaderSchema(schema); + + // call the method recursively over all the internal fields of the given avro + // objectinspector + for (StructField field : aoi.getAllStructFieldRefs()) { + initAvroObjectInspector(field.getFieldObjectInspector()); + } + } +} \ No newline at end of file diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java new file mode 100644 index 000000000000..ac2cb5709e92 --- /dev/null +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.hbase.struct; + +import java.io.IOException; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.hbase.HBaseSerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * Default implementation of the {@link HBaseValueFactory} + * */ +public class DefaultHBaseValueFactory implements HBaseValueFactory{ + + protected LazySimpleSerDe.SerDeParameters serdeParams; + protected HBaseSerDeParameters hbaseParams; + protected Properties properties; + protected Configuration conf; + + @Override + public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Properties properties) + throws SerDeException { + this.hbaseParams = hbaseParams; + this.serdeParams = hbaseParams.getSerdeParams(); + this.properties = properties; + this.conf = conf; + } + + @Override + public ObjectInspector createValueObjectInspector(TypeInfo type) + throws SerDeException { + return LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(), + 1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar()); + } + + @Override + public byte[] serializeValue(Object object, StructField field) + throws IOException { + // TODO Add support for serialization of values here + return null; + } +} \ No newline at end of file diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java new file mode 100644 index 000000000000..8722af08856c --- /dev/null +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.hbase.struct; + +import java.io.IOException; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.hbase.HBaseSerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * Provides capability to plugin custom implementations for querying of data stored in HBase. + * */ +public interface HBaseValueFactory { + + /** + * Initialize factory with properties + * + * @param hbaseParam the {@link HBaseParameters hbase parameters} + * @param conf the hadoop {@link Configuration configuration} + * @param properties the custom {@link Properties} + * @throws SerDeException if there was an issue initializing the factory + */ + void init(HBaseSerDeParameters hbaseParam, Configuration conf, Properties properties) + throws SerDeException; + + /** + * create custom object inspector for the value + * + * @param type type information + * @throws SerDeException if there was an issue creating the {@link ObjectInspector object inspector} + */ + ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException; + + /** + * Serialize the given hive object + * + * @param object the object to be serialized + * @param field the {@link StructField} + * @return the serialized value + * @throws {@link IOException} if there was an issue serializing the value + */ + byte[] serializeValue(Object object, StructField field) throws IOException; +} \ No newline at end of file diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java new file mode 100644 index 000000000000..de519e511c94 --- /dev/null +++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.hbase; + +import java.util.Properties; + +import org.apache.avro.Schema; +import org.apache.avro.reflect.ReflectData; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.serde2.avro.AvroSchemaRetriever; + +/** + * Mock implementation + * */ +public class HBaseTestAvroSchemaRetriever extends AvroSchemaRetriever { + + private static final byte[] TEST_BYTE_ARRAY = Bytes.toBytes("test"); + + public HBaseTestAvroSchemaRetriever(Configuration conf, Properties tbl) { + } + + @Override + public Schema retrieveWriterSchema(Object source) { + Class clazz; + try { + clazz = Class.forName("org.apache.hadoop.hive.hbase.avro.Employee"); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + + return ReflectData.get().getSchema(clazz); + } + + @Override + public Schema retrieveReaderSchema(Object source) { + Class clazz; + try { + clazz = Class.forName("org.apache.hadoop.hive.hbase.avro.Employee"); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + + return ReflectData.get().getSchema(clazz); + } + + @Override + public int getOffset() { + return TEST_BYTE_ARRAY.length; + } +} \ No newline at end of file diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java index e61e2788ac68..241818a0e8a0 100644 --- a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java +++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java @@ -22,23 +22,45 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Properties; import junit.framework.TestCase; +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.specific.SpecificDatumWriter; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hive.hbase.avro.Address; +import org.apache.hadoop.hive.hbase.avro.ContactInfo; +import org.apache.hadoop.hive.hbase.avro.Employee; +import org.apache.hadoop.hive.hbase.avro.Gender; +import org.apache.hadoop.hive.hbase.avro.HomePhone; +import org.apache.hadoop.hive.hbase.avro.OfficePhone; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BooleanWritable; @@ -53,6 +75,66 @@ */ public class TestHBaseSerDe extends TestCase { + static final byte[] TEST_BYTE_ARRAY = Bytes.toBytes("test"); + + private static final String RECORD_SCHEMA = "{\n" + + " \"namespace\": \"testing.test.mctesty\",\n" + + " \"name\": \"oneRecord\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aRecord\",\n" + + " \"type\":{\"type\":\"record\",\n" + + " \"name\":\"recordWithinARecord\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"int1\",\n" + + " \"type\":\"int\"\n" + + " },\n" + + " {\n" + + " \"name\":\"boolean1\",\n" + + " \"type\":\"boolean\"\n" + + " },\n" + + " {\n" + + " \"name\":\"long1\",\n" + + " \"type\":\"long\"\n" + + " }\n" + + " ]}\n" + + " }\n" + + " ]\n" + + "}"; + + private static final String RECORD_SCHEMA_EVOLVED = "{\n" + + " \"namespace\": \"testing.test.mctesty\",\n" + + " \"name\": \"oneRecord\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aRecord\",\n" + + " \"type\":{\"type\":\"record\",\n" + + " \"name\":\"recordWithinARecord\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"int1\",\n" + + " \"type\":\"int\"\n" + + " },\n" + + " {\n" + + " \"name\":\"string1\",\n" + + " \"type\":\"string\", \"default\": \"test\"\n" + + " },\n" + + " {\n" + + " \"name\":\"boolean1\",\n" + + " \"type\":\"boolean\"\n" + + " },\n" + + " {\n" + + " \"name\":\"long1\",\n" + + " \"type\":\"long\"\n" + + " }\n" + + " ]}\n" + + " }\n" + + " ]\n" + + "}"; + /** * Test the default behavior of the Lazy family of objects and object inspectors. */ @@ -551,7 +633,7 @@ private Properties createPropertiesForHiveMapHBaseColumnFamily() { "key,valint,valbyte,valshort,vallong,valfloat,valdouble,valbool"); tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string:map:map:map:map:" - + "map:map:map"); + + "map:map:map"); tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key#-,cf-int:#b:b,cf-byte:#b:b,cf-short:#b:b,cf-long:#b:b,cf-float:#b:b,cf-double:#b:b," + "cf-bool:#b:b"); @@ -565,7 +647,7 @@ private Properties createPropertiesForHiveMapHBaseColumnFamilyII() { "key,valint,valbyte,valshort,vallong,valfloat,valdouble,valbool"); tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string:map:map:map:map:" - + "map:map:map"); + + "map:map:map"); tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key#-,cf-int:#-:-,cf-byte:#-:-,cf-short:#-:-,cf-long:#-:-,cf-float:#-:-,cf-double:#-:-," + "cf-bool:#-:-"); @@ -636,7 +718,7 @@ private Properties createPropertiesForHiveMapHBaseColumnFamilyII_I() { "key,valbyte,valshort,valint,vallong,valfloat,valdouble,valstring,valbool"); tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string:map:map:map:map:" - + "map:map:map:map"); + + "map:map:map:map"); tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key#s,cf-byte:#-:s,cf-short:#s:-,cf-int:#s:s,cf-long:#-:-,cf-float:#s:-,cf-double:#-:s," + "cf-string:#s:s,cf-bool:#-:-"); @@ -650,7 +732,7 @@ private Properties createPropertiesForHiveMapHBaseColumnFamilyII_II() { "key,valbyte,valshort,valint,vallong,valfloat,valdouble,valstring,valbool"); tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string:map:map:map:map:" - + "map:map:map:map"); + + "map:map:map:map"); tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key#s,cf-byte:#s:s,cf-short:#s:s,cf-int:#s:s,cf-long:#s:s,cf-float:#s:s,cf-double:#s:s," + "cf-string:#s:s,cf-bool:#s:s"); @@ -934,6 +1016,592 @@ private void deserializeAndSerializeHBaseCompositeKey(HBaseSerDe serDe, Result r assertEquals("Serialized put:", p.toString(), put.toString()); } + public void testHBaseSerDeWithAvroSchemaInline() throws SerDeException, IOException { + byte[] cfa = "cola".getBytes(); + + byte[] qualAvro = "avro".getBytes(); + + byte[] rowKey = Bytes.toBytes("test-row1"); + + // Data + List kvs = new ArrayList(); + + byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA); + + kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Result r = new Result(kvs); + + Put p = new Put(rowKey); + + // Post serialization, separators are automatically inserted between different fields in the + // struct. Currently there is not way to disable that. So the work around here is to pad the + // data with the separator bytes before creating a "Put" object + p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")}; + + // Create, initialize, and test the SerDe + HBaseSerDe serDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createPropertiesForHiveAvroSchemaInline(); + serDe.initialize(conf, tbl); + + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + } + + private Properties createPropertiesForHiveAvroSchemaInline() { + Properties tbl = new Properties(); + tbl.setProperty("cola.avro.serialization.type", "avro"); + tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); + tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); + + return tbl; + } + + public void testHBaseSerDeWithForwardEvolvedSchema() throws SerDeException, IOException { + byte[] cfa = "cola".getBytes(); + + byte[] qualAvro = "avro".getBytes(); + + byte[] rowKey = Bytes.toBytes("test-row1"); + + // Data + List kvs = new ArrayList(); + + byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA); + + kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Result r = new Result(kvs); + + Put p = new Put(rowKey); + + // Post serialization, separators are automatically inserted between different fields in the + // struct. Currently there is not way to disable that. So the work around here is to pad the + // data with the separator bytes before creating a "Put" object + p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Object[] expectedFieldsData = {new String("test-row1"), + new String("[[42, test, true, 42432234234]]")}; + + // Create, initialize, and test the SerDe + HBaseSerDe serDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createPropertiesForHiveAvroForwardEvolvedSchema(); + serDe.initialize(conf, tbl); + + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + } + + private Properties createPropertiesForHiveAvroForwardEvolvedSchema() { + Properties tbl = new Properties(); + tbl.setProperty("cola.avro.serialization.type", "avro"); + tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA_EVOLVED); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); + tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); + + return tbl; + } + + public void testHBaseSerDeWithBackwardEvolvedSchema() throws SerDeException, IOException { + byte[] cfa = "cola".getBytes(); + + byte[] qualAvro = "avro".getBytes(); + + byte[] rowKey = Bytes.toBytes("test-row1"); + + // Data + List kvs = new ArrayList(); + + byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA_EVOLVED); + + kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Result r = new Result(kvs); + + Put p = new Put(rowKey); + + // Post serialization, separators are automatically inserted between different fields in the + // struct. Currently there is not way to disable that. So the work around here is to pad the + // data with the separator bytes before creating a "Put" object + p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")}; + + // Create, initialize, and test the SerDe + HBaseSerDe serDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createPropertiesForHiveAvroBackwardEvolvedSchema(); + serDe.initialize(conf, tbl); + + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + } + + private Properties createPropertiesForHiveAvroBackwardEvolvedSchema() { + Properties tbl = new Properties(); + tbl.setProperty("cola.avro.serialization.type", "avro"); + tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); + tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); + + return tbl; + } + + public void testHBaseSerDeWithAvroSerClass() throws SerDeException, IOException { + byte[] cfa = "cola".getBytes(); + + byte[] qualAvro = "avro".getBytes(); + + byte[] rowKey = Bytes.toBytes("test-row1"); + + // Data + List kvs = new ArrayList(); + + byte[] avroData = getTestAvroBytesFromClass1(1); + + kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Result r = new Result(kvs); + + Put p = new Put(rowKey); + + // Post serialization, separators are automatically inserted between different fields in the + // struct. Currently there is not way to disable that. So the work around here is to pad the + // data with the separator bytes before creating a "Put" object + p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Object[] expectedFieldsData = { + new String("test-row1"), + new String( + "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], " + + "[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], " + + "[999, 1234567890], [999, 1234455555]]]")}; + + // Create, initialize, and test the SerDe + HBaseSerDe serDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createPropertiesForHiveAvroSerClass(); + serDe.initialize(conf, tbl); + + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + } + + private Properties createPropertiesForHiveAvroSerClass() { + Properties tbl = new Properties(); + tbl.setProperty("cola.avro.serialization.type", "avro"); + tbl.setProperty("cola.avro." + serdeConstants.SERIALIZATION_CLASS, + "org.apache.hadoop.hive.hbase.avro.Employee"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); + tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); + + return tbl; + } + + public void testHBaseSerDeWithAvroSchemaUrl() throws SerDeException, IOException { + byte[] cfa = "cola".getBytes(); + + byte[] qualAvro = "avro".getBytes(); + + byte[] rowKey = Bytes.toBytes("test-row1"); + + // Data + List kvs = new ArrayList(); + + byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA); + + kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Result r = new Result(kvs); + + Put p = new Put(rowKey); + + // Post serialization, separators are automatically inserted between different fields in the + // struct. Currently there is not way to disable that. So the work around here is to pad the + // data with the separator bytes before creating a "Put" object + p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")}; + + MiniDFSCluster miniDfs = null; + + try { + // MiniDFSCluster litters files and folders all over the place. + miniDfs = new MiniDFSCluster(new Configuration(), 1, true, null); + + miniDfs.getFileSystem().mkdirs(new Path("/path/to/schema")); + FSDataOutputStream out = miniDfs.getFileSystem().create( + new Path("/path/to/schema/schema.avsc")); + out.writeBytes(RECORD_SCHEMA); + out.close(); + String onHDFS = miniDfs.getFileSystem().getUri() + "/path/to/schema/schema.avsc"; + + // Create, initialize, and test the SerDe + HBaseSerDe serDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createPropertiesForHiveAvroSchemaUrl(onHDFS); + serDe.initialize(conf, tbl); + + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + } finally { + // Teardown the cluster + if (miniDfs != null) { + miniDfs.shutdown(); + } + } + } + + private Properties createPropertiesForHiveAvroSchemaUrl(String schemaUrl) { + Properties tbl = new Properties(); + tbl.setProperty("cola.avro.serialization.type", "avro"); + tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_URL, schemaUrl); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); + tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); + + return tbl; + } + + public void testHBaseSerDeWithAvroExternalSchema() throws SerDeException, IOException { + byte[] cfa = "cola".getBytes(); + + byte[] qualAvro = "avro".getBytes(); + + byte[] rowKey = Bytes.toBytes("test-row1"); + + // Data + List kvs = new ArrayList(); + + byte[] avroData = getTestAvroBytesFromClass2(1); + + kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Result r = new Result(kvs); + + Put p = new Put(rowKey); + + // Post serialization, separators are automatically inserted between different fields in the + // struct. Currently there is not way to disable that. So the work around here is to pad the + // data with the separator bytes before creating a "Put" object + p.add(new KeyValue(rowKey, cfa, qualAvro, avroData)); + + Object[] expectedFieldsData = { + new String("test-row1"), + new String( + "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], " + + "[999, 1234567890], [999, 1234455555]]]")}; + + // Create, initialize, and test the SerDe + HBaseSerDe serDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + + Properties tbl = createPropertiesForHiveAvroExternalSchema(); + serDe.initialize(conf, tbl); + + deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData); + } + + private Properties createPropertiesForHiveAvroExternalSchema() { + Properties tbl = new Properties(); + tbl.setProperty("cola.avro.serialization.type", "avro"); + tbl.setProperty(AvroSerdeUtils.SCHEMA_RETRIEVER, + "org.apache.hadoop.hive.hbase.HBaseTestAvroSchemaRetriever"); + tbl.setProperty("cola.avro." + serdeConstants.SERIALIZATION_CLASS, + "org.apache.hadoop.hive.hbase.avro.Employee"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro"); + tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); + + return tbl; + } + + public void testHBaseSerDeWithHiveMapToHBaseAvroColumnFamily() throws Exception { + byte[] cfa = "cola".getBytes(); + + byte[] qualAvroA = "prefixA_avro1".getBytes(); + byte[] qualAvroB = "prefixB_avro2".getBytes(); + byte[] qualAvroC = "prefixB_avro3".getBytes(); + + List qualifiers = new ArrayList(); + qualifiers.add(new Text("prefixA_avro1")); + qualifiers.add(new Text("prefixB_avro2")); + qualifiers.add(new Text("prefixB_avro3")); + + List expectedQualifiers = new ArrayList(); + expectedQualifiers.add(new Text("prefixB_avro2")); + expectedQualifiers.add(new Text("prefixB_avro3")); + + byte[] rowKey = Bytes.toBytes("test-row1"); + + // Data + List kvs = new ArrayList(); + + byte[] avroDataA = getTestAvroBytesFromSchema(RECORD_SCHEMA); + byte[] avroDataB = getTestAvroBytesFromClass1(1); + byte[] avroDataC = getTestAvroBytesFromClass1(2); + + kvs.add(new KeyValue(rowKey, cfa, qualAvroA, avroDataA)); + kvs.add(new KeyValue(rowKey, cfa, qualAvroB, avroDataB)); + kvs.add(new KeyValue(rowKey, cfa, qualAvroC, avroDataC)); + + Result r = new Result(kvs); + + Put p = new Put(rowKey); + + // Post serialization, separators are automatically inserted between different fields in the + // struct. Currently there is not way to disable that. So the work around here is to pad the + // data with the separator bytes before creating a "Put" object + p.add(new KeyValue(rowKey, cfa, qualAvroB, Bytes.padTail(avroDataB, 11))); + p.add(new KeyValue(rowKey, cfa, qualAvroC, Bytes.padTail(avroDataC, 11))); + + Object[] expectedFieldsData = { + new Text("test-row1"), + new String( + "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], " + + "[999, 1234567890], [999, 1234455555]]]"), + new String( + "[Avro Employee2, 11111, 25, FEMALE, [[[Avro First Address2, Avro Second Address2, Avro City2, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address2, Avro Second Address2, Avro City2, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], " + + "[999, 1234567890], [999, 1234455555]]]")}; + + int[] expectedMapSize = new int[] {2}; + + // Create, initialize, and test the SerDe + HBaseSerDe serDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createPropertiesForHiveAvroColumnFamilyMap(); + serDe.initialize(conf, tbl); + + Object notPresentKey = new Text("prefixA_avro1"); + + deserializeAndSerializeHiveStructColumnFamily(serDe, r, p, expectedFieldsData, expectedMapSize, + expectedQualifiers, + notPresentKey); + } + + private Properties createPropertiesForHiveAvroColumnFamilyMap() { + Properties tbl = new Properties(); + tbl.setProperty("cola.prefixB_.serialization.type", "avro"); + tbl.setProperty("cola.prefixB_." + serdeConstants.SERIALIZATION_CLASS, + "org.apache.hadoop.hive.hbase.avro.Employee"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, "cola:prefixB_.*"); + tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); + tbl.setProperty(LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS, "true"); + + return tbl; + } + + private void deserializeAndSerializeHiveAvro(HBaseSerDe serDe, Result r, Put p, + Object[] expectedFieldsData) + throws SerDeException, IOException { + StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector(); + + List fieldRefs = soi.getAllStructFieldRefs(); + + Object row = serDe.deserialize(new ResultWritable(r)); + + for (int j = 0; j < fieldRefs.size(); j++) { + Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j)); + assertNotNull(fieldData); + assertEquals(expectedFieldsData[j], fieldData.toString().trim()); + } + + // Now serialize + Put put = ((PutWritable) serDe.serialize(row, soi)).getPut(); + + assertNotNull(put); + assertEquals(p.getFamilyCellMap(), put.getFamilyCellMap()); + } + + private void deserializeAndSerializeHiveStructColumnFamily(HBaseSerDe serDe, Result r, Put p, + Object[] expectedFieldsData, + int[] expectedMapSize, List expectedQualifiers, Object notPresentKey) + throws SerDeException, IOException { + StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector(); + + List fieldRefs = soi.getAllStructFieldRefs(); + + Object row = serDe.deserialize(new ResultWritable(r)); + + int k = 0; + + for (int i = 0; i < fieldRefs.size(); i++) { + Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i)); + assertNotNull(fieldData); + + if (fieldData instanceof LazyPrimitive) { + assertEquals(expectedFieldsData[i], ((LazyPrimitive) fieldData).getWritableObject()); + } else if (fieldData instanceof LazyHBaseCellMap) { + + for (int j = 0; j < ((LazyHBaseCellMap) fieldData).getMapSize(); j++) { + assertEquals(expectedFieldsData[k + 1], + ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedQualifiers.get(k)) + .toString().trim()); + k++; + } + + assertEquals(expectedMapSize[i - 1], ((LazyHBaseCellMap) fieldData).getMapSize()); + + // Make sure that the unwanted key is not present in the map + assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey)); + + } else { + fail("Error: field data not an instance of LazyPrimitive or LazyHBaseCellMap"); + } + } + + SerDeUtils.getJSONString(row, soi); + + // Now serialize + Put put = ((PutWritable) serDe.serialize(row, soi)).getPut(); + + assertNotNull(put); + } + + private byte[] getTestAvroBytesFromSchema(String schemaToUse) throws IOException { + Schema s = Schema.parse(schemaToUse); + GenericData.Record record = new GenericData.Record(s); + GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema()); + innerRecord.put("int1", 42); + innerRecord.put("boolean1", true); + innerRecord.put("long1", 42432234234l); + + if (schemaToUse.equals(RECORD_SCHEMA_EVOLVED)) { + innerRecord.put("string1", "new value"); + } + + record.put("aRecord", innerRecord); + + DatumWriter datumWriter = new GenericDatumWriter(s); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + + DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); + dataFileWriter.create(s, out); + dataFileWriter.append(record); + dataFileWriter.close(); + + byte[] data = out.toByteArray(); + + out.close(); + return data; + } + + private byte[] getTestAvroBytesFromClass1(int i) throws IOException { + Employee employee = new Employee(); + + employee.setEmployeeName("Avro Employee" + i); + employee.setEmployeeID(11111L); + employee.setGender(Gender.FEMALE); + employee.setAge(25L); + + Address address = new Address(); + + address.setAddress1("Avro First Address" + i); + address.setAddress2("Avro Second Address" + i); + address.setCity("Avro City" + i); + address.setZipcode(123456L); + + Map metadata = new HashMap(); + + metadata.put("testkey", "testvalue"); + + address.setMetadata(metadata); + + HomePhone hPhone = new HomePhone(); + + hPhone.setAreaCode(999L); + hPhone.setNumber(1234567890L); + + OfficePhone oPhone = new OfficePhone(); + + oPhone.setAreaCode(999L); + oPhone.setNumber(1234455555L); + + ContactInfo contact = new ContactInfo(); + + List
addresses = new ArrayList
(); + address.setCounty(hPhone); // set value for the union type + addresses.add(address); + addresses.add(address); + + contact.setAddress(addresses); + + contact.setHomePhone(hPhone); + contact.setOfficePhone(oPhone); + + employee.setContactInfo(contact); + + DatumWriter datumWriter = new SpecificDatumWriter(Employee.class); + DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + + dataFileWriter.create(employee.getSchema(), out); + dataFileWriter.append(employee); + dataFileWriter.close(); + + return out.toByteArray(); + } + + private byte[] getTestAvroBytesFromClass2(int i) throws IOException { + Employee employee = new Employee(); + + employee.setEmployeeName("Avro Employee" + i); + employee.setEmployeeID(11111L); + employee.setGender(Gender.FEMALE); + employee.setAge(25L); + + Address address = new Address(); + + address.setAddress1("Avro First Address" + i); + address.setAddress2("Avro Second Address" + i); + address.setCity("Avro City" + i); + address.setZipcode(123456L); + + Map metadata = new HashMap(); + + metadata.put("testkey", "testvalue"); + + address.setMetadata(metadata); + + HomePhone hPhone = new HomePhone(); + + hPhone.setAreaCode(999L); + hPhone.setNumber(1234567890L); + + OfficePhone oPhone = new OfficePhone(); + + oPhone.setAreaCode(999L); + oPhone.setNumber(1234455555L); + + ContactInfo contact = new ContactInfo(); + + List
addresses = new ArrayList
(); + address.setCounty(hPhone); // set value for the union type + addresses.add(address); + addresses.add(address); + + contact.setAddress(addresses); + + contact.setHomePhone(hPhone); + contact.setOfficePhone(oPhone); + + employee.setContactInfo(contact); + + DatumWriter employeeWriter = new SpecificDatumWriter(Employee.class); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + + Encoder encoder = EncoderFactory.get().binaryEncoder(out, null); + + // write out a header for the payload + out.write(TEST_BYTE_ARRAY); + + employeeWriter.write(employee, encoder); + + encoder.flush(); + + return out.toByteArray(); + } + class TestStruct { String f1; String f2; diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Address.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Address.java new file mode 100644 index 000000000000..e9145c74ebb0 --- /dev/null +++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Address.java @@ -0,0 +1,465 @@ +/** + * Autogenerated by Avro + * + * DO NOT EDIT DIRECTLY + */ +package org.apache.hadoop.hive.hbase.avro; +@SuppressWarnings("all") +@org.apache.avro.specific.AvroGenerated +public class Address extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Address\",\"namespace\":\"org.apache.hadoop.hive.hbase.avro\",\"fields\":[{\"name\":\"address1\",\"type\":\"string\"},{\"name\":\"address2\",\"type\":\"string\"},{\"name\":\"city\",\"type\":\"string\"},{\"name\":\"zipcode\",\"type\":\"long\"},{\"name\":\"county\",\"type\":[{\"type\":\"record\",\"name\":\"HomePhone\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]},{\"type\":\"record\",\"name\":\"OfficePhone\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]},\"string\",\"null\"]},{\"name\":\"aliases\",\"type\":[{\"type\":\"array\",\"items\":\"string\"},\"null\"]},{\"name\":\"metadata\",\"type\":[\"null\",{\"type\":\"map\",\"values\":\"string\"}]}]}"); + public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } + @Deprecated public java.lang.CharSequence address1; + @Deprecated public java.lang.CharSequence address2; + @Deprecated public java.lang.CharSequence city; + @Deprecated public long zipcode; + @Deprecated public java.lang.Object county; + @Deprecated public java.util.List aliases; + @Deprecated public java.util.Map metadata; + + /** + * Default constructor. Note that this does not initialize fields + * to their default values from the schema. If that is desired then + * one should use newBuilder(). + */ + public Address() {} + + /** + * All-args constructor. + */ + public Address(java.lang.CharSequence address1, java.lang.CharSequence address2, java.lang.CharSequence city, java.lang.Long zipcode, java.lang.Object county, java.util.List aliases, java.util.Map metadata) { + this.address1 = address1; + this.address2 = address2; + this.city = city; + this.zipcode = zipcode; + this.county = county; + this.aliases = aliases; + this.metadata = metadata; + } + + public org.apache.avro.Schema getSchema() { return SCHEMA$; } + // Used by DatumWriter. Applications should not call. + public java.lang.Object get(int field$) { + switch (field$) { + case 0: return address1; + case 1: return address2; + case 2: return city; + case 3: return zipcode; + case 4: return county; + case 5: return aliases; + case 6: return metadata; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + // Used by DatumReader. Applications should not call. + @SuppressWarnings(value="unchecked") + public void put(int field$, java.lang.Object value$) { + switch (field$) { + case 0: address1 = (java.lang.CharSequence)value$; break; + case 1: address2 = (java.lang.CharSequence)value$; break; + case 2: city = (java.lang.CharSequence)value$; break; + case 3: zipcode = (java.lang.Long)value$; break; + case 4: county = (java.lang.Object)value$; break; + case 5: aliases = (java.util.List)value$; break; + case 6: metadata = (java.util.Map)value$; break; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + + /** + * Gets the value of the 'address1' field. + */ + public java.lang.CharSequence getAddress1() { + return address1; + } + + /** + * Sets the value of the 'address1' field. + * @param value the value to set. + */ + public void setAddress1(java.lang.CharSequence value) { + this.address1 = value; + } + + /** + * Gets the value of the 'address2' field. + */ + public java.lang.CharSequence getAddress2() { + return address2; + } + + /** + * Sets the value of the 'address2' field. + * @param value the value to set. + */ + public void setAddress2(java.lang.CharSequence value) { + this.address2 = value; + } + + /** + * Gets the value of the 'city' field. + */ + public java.lang.CharSequence getCity() { + return city; + } + + /** + * Sets the value of the 'city' field. + * @param value the value to set. + */ + public void setCity(java.lang.CharSequence value) { + this.city = value; + } + + /** + * Gets the value of the 'zipcode' field. + */ + public java.lang.Long getZipcode() { + return zipcode; + } + + /** + * Sets the value of the 'zipcode' field. + * @param value the value to set. + */ + public void setZipcode(java.lang.Long value) { + this.zipcode = value; + } + + /** + * Gets the value of the 'county' field. + */ + public java.lang.Object getCounty() { + return county; + } + + /** + * Sets the value of the 'county' field. + * @param value the value to set. + */ + public void setCounty(java.lang.Object value) { + this.county = value; + } + + /** + * Gets the value of the 'aliases' field. + */ + public java.util.List getAliases() { + return aliases; + } + + /** + * Sets the value of the 'aliases' field. + * @param value the value to set. + */ + public void setAliases(java.util.List value) { + this.aliases = value; + } + + /** + * Gets the value of the 'metadata' field. + */ + public java.util.Map getMetadata() { + return metadata; + } + + /** + * Sets the value of the 'metadata' field. + * @param value the value to set. + */ + public void setMetadata(java.util.Map value) { + this.metadata = value; + } + + /** Creates a new Address RecordBuilder */ + public static org.apache.hadoop.hive.hbase.avro.Address.Builder newBuilder() { + return new org.apache.hadoop.hive.hbase.avro.Address.Builder(); + } + + /** Creates a new Address RecordBuilder by copying an existing Builder */ + public static org.apache.hadoop.hive.hbase.avro.Address.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.Address.Builder other) { + return new org.apache.hadoop.hive.hbase.avro.Address.Builder(other); + } + + /** Creates a new Address RecordBuilder by copying an existing Address instance */ + public static org.apache.hadoop.hive.hbase.avro.Address.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.Address other) { + return new org.apache.hadoop.hive.hbase.avro.Address.Builder(other); + } + + /** + * RecordBuilder for Address instances. + */ + public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase
+ implements org.apache.avro.data.RecordBuilder
{ + + private java.lang.CharSequence address1; + private java.lang.CharSequence address2; + private java.lang.CharSequence city; + private long zipcode; + private java.lang.Object county; + private java.util.List aliases; + private java.util.Map metadata; + + /** Creates a new Builder */ + private Builder() { + super(org.apache.hadoop.hive.hbase.avro.Address.SCHEMA$); + } + + /** Creates a Builder by copying an existing Builder */ + private Builder(org.apache.hadoop.hive.hbase.avro.Address.Builder other) { + super(other); + if (isValidValue(fields()[0], other.address1)) { + this.address1 = data().deepCopy(fields()[0].schema(), other.address1); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.address2)) { + this.address2 = data().deepCopy(fields()[1].schema(), other.address2); + fieldSetFlags()[1] = true; + } + if (isValidValue(fields()[2], other.city)) { + this.city = data().deepCopy(fields()[2].schema(), other.city); + fieldSetFlags()[2] = true; + } + if (isValidValue(fields()[3], other.zipcode)) { + this.zipcode = data().deepCopy(fields()[3].schema(), other.zipcode); + fieldSetFlags()[3] = true; + } + if (isValidValue(fields()[4], other.county)) { + this.county = data().deepCopy(fields()[4].schema(), other.county); + fieldSetFlags()[4] = true; + } + if (isValidValue(fields()[5], other.aliases)) { + this.aliases = data().deepCopy(fields()[5].schema(), other.aliases); + fieldSetFlags()[5] = true; + } + if (isValidValue(fields()[6], other.metadata)) { + this.metadata = data().deepCopy(fields()[6].schema(), other.metadata); + fieldSetFlags()[6] = true; + } + } + + /** Creates a Builder by copying an existing Address instance */ + private Builder(org.apache.hadoop.hive.hbase.avro.Address other) { + super(org.apache.hadoop.hive.hbase.avro.Address.SCHEMA$); + if (isValidValue(fields()[0], other.address1)) { + this.address1 = data().deepCopy(fields()[0].schema(), other.address1); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.address2)) { + this.address2 = data().deepCopy(fields()[1].schema(), other.address2); + fieldSetFlags()[1] = true; + } + if (isValidValue(fields()[2], other.city)) { + this.city = data().deepCopy(fields()[2].schema(), other.city); + fieldSetFlags()[2] = true; + } + if (isValidValue(fields()[3], other.zipcode)) { + this.zipcode = data().deepCopy(fields()[3].schema(), other.zipcode); + fieldSetFlags()[3] = true; + } + if (isValidValue(fields()[4], other.county)) { + this.county = data().deepCopy(fields()[4].schema(), other.county); + fieldSetFlags()[4] = true; + } + if (isValidValue(fields()[5], other.aliases)) { + this.aliases = data().deepCopy(fields()[5].schema(), other.aliases); + fieldSetFlags()[5] = true; + } + if (isValidValue(fields()[6], other.metadata)) { + this.metadata = data().deepCopy(fields()[6].schema(), other.metadata); + fieldSetFlags()[6] = true; + } + } + + /** Gets the value of the 'address1' field */ + public java.lang.CharSequence getAddress1() { + return address1; + } + + /** Sets the value of the 'address1' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder setAddress1(java.lang.CharSequence value) { + validate(fields()[0], value); + this.address1 = value; + fieldSetFlags()[0] = true; + return this; + } + + /** Checks whether the 'address1' field has been set */ + public boolean hasAddress1() { + return fieldSetFlags()[0]; + } + + /** Clears the value of the 'address1' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder clearAddress1() { + address1 = null; + fieldSetFlags()[0] = false; + return this; + } + + /** Gets the value of the 'address2' field */ + public java.lang.CharSequence getAddress2() { + return address2; + } + + /** Sets the value of the 'address2' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder setAddress2(java.lang.CharSequence value) { + validate(fields()[1], value); + this.address2 = value; + fieldSetFlags()[1] = true; + return this; + } + + /** Checks whether the 'address2' field has been set */ + public boolean hasAddress2() { + return fieldSetFlags()[1]; + } + + /** Clears the value of the 'address2' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder clearAddress2() { + address2 = null; + fieldSetFlags()[1] = false; + return this; + } + + /** Gets the value of the 'city' field */ + public java.lang.CharSequence getCity() { + return city; + } + + /** Sets the value of the 'city' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder setCity(java.lang.CharSequence value) { + validate(fields()[2], value); + this.city = value; + fieldSetFlags()[2] = true; + return this; + } + + /** Checks whether the 'city' field has been set */ + public boolean hasCity() { + return fieldSetFlags()[2]; + } + + /** Clears the value of the 'city' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder clearCity() { + city = null; + fieldSetFlags()[2] = false; + return this; + } + + /** Gets the value of the 'zipcode' field */ + public java.lang.Long getZipcode() { + return zipcode; + } + + /** Sets the value of the 'zipcode' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder setZipcode(long value) { + validate(fields()[3], value); + this.zipcode = value; + fieldSetFlags()[3] = true; + return this; + } + + /** Checks whether the 'zipcode' field has been set */ + public boolean hasZipcode() { + return fieldSetFlags()[3]; + } + + /** Clears the value of the 'zipcode' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder clearZipcode() { + fieldSetFlags()[3] = false; + return this; + } + + /** Gets the value of the 'county' field */ + public java.lang.Object getCounty() { + return county; + } + + /** Sets the value of the 'county' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder setCounty(java.lang.Object value) { + validate(fields()[4], value); + this.county = value; + fieldSetFlags()[4] = true; + return this; + } + + /** Checks whether the 'county' field has been set */ + public boolean hasCounty() { + return fieldSetFlags()[4]; + } + + /** Clears the value of the 'county' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder clearCounty() { + county = null; + fieldSetFlags()[4] = false; + return this; + } + + /** Gets the value of the 'aliases' field */ + public java.util.List getAliases() { + return aliases; + } + + /** Sets the value of the 'aliases' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder setAliases(java.util.List value) { + validate(fields()[5], value); + this.aliases = value; + fieldSetFlags()[5] = true; + return this; + } + + /** Checks whether the 'aliases' field has been set */ + public boolean hasAliases() { + return fieldSetFlags()[5]; + } + + /** Clears the value of the 'aliases' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder clearAliases() { + aliases = null; + fieldSetFlags()[5] = false; + return this; + } + + /** Gets the value of the 'metadata' field */ + public java.util.Map getMetadata() { + return metadata; + } + + /** Sets the value of the 'metadata' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder setMetadata(java.util.Map value) { + validate(fields()[6], value); + this.metadata = value; + fieldSetFlags()[6] = true; + return this; + } + + /** Checks whether the 'metadata' field has been set */ + public boolean hasMetadata() { + return fieldSetFlags()[6]; + } + + /** Clears the value of the 'metadata' field */ + public org.apache.hadoop.hive.hbase.avro.Address.Builder clearMetadata() { + metadata = null; + fieldSetFlags()[6] = false; + return this; + } + + @Override + public Address build() { + try { + Address record = new Address(); + record.address1 = fieldSetFlags()[0] ? this.address1 : (java.lang.CharSequence) defaultValue(fields()[0]); + record.address2 = fieldSetFlags()[1] ? this.address2 : (java.lang.CharSequence) defaultValue(fields()[1]); + record.city = fieldSetFlags()[2] ? this.city : (java.lang.CharSequence) defaultValue(fields()[2]); + record.zipcode = fieldSetFlags()[3] ? this.zipcode : (java.lang.Long) defaultValue(fields()[3]); + record.county = fieldSetFlags()[4] ? this.county : (java.lang.Object) defaultValue(fields()[4]); + record.aliases = fieldSetFlags()[5] ? this.aliases : (java.util.List) defaultValue(fields()[5]); + record.metadata = fieldSetFlags()[6] ? this.metadata : (java.util.Map) defaultValue(fields()[6]); + return record; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } +} diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/ContactInfo.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/ContactInfo.java new file mode 100644 index 000000000000..a19a93720787 --- /dev/null +++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/ContactInfo.java @@ -0,0 +1,250 @@ +/** + * Autogenerated by Avro + * + * DO NOT EDIT DIRECTLY + */ +package org.apache.hadoop.hive.hbase.avro; +@SuppressWarnings("all") +@org.apache.avro.specific.AvroGenerated +public class ContactInfo extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"ContactInfo\",\"namespace\":\"org.apache.hadoop.hive.hbase.avro\",\"fields\":[{\"name\":\"address\",\"type\":[{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Address\",\"fields\":[{\"name\":\"address1\",\"type\":\"string\"},{\"name\":\"address2\",\"type\":\"string\"},{\"name\":\"city\",\"type\":\"string\"},{\"name\":\"zipcode\",\"type\":\"long\"},{\"name\":\"county\",\"type\":[{\"type\":\"record\",\"name\":\"HomePhone\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]},{\"type\":\"record\",\"name\":\"OfficePhone\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]},\"string\",\"null\"]},{\"name\":\"aliases\",\"type\":[{\"type\":\"array\",\"items\":\"string\"},\"null\"]},{\"name\":\"metadata\",\"type\":[\"null\",{\"type\":\"map\",\"values\":\"string\"}]}]}},\"null\"]},{\"name\":\"homePhone\",\"type\":\"HomePhone\"},{\"name\":\"officePhone\",\"type\":\"OfficePhone\"}]}"); + public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } + @Deprecated public java.util.List address; + @Deprecated public org.apache.hadoop.hive.hbase.avro.HomePhone homePhone; + @Deprecated public org.apache.hadoop.hive.hbase.avro.OfficePhone officePhone; + + /** + * Default constructor. Note that this does not initialize fields + * to their default values from the schema. If that is desired then + * one should use newBuilder(). + */ + public ContactInfo() {} + + /** + * All-args constructor. + */ + public ContactInfo(java.util.List address, org.apache.hadoop.hive.hbase.avro.HomePhone homePhone, org.apache.hadoop.hive.hbase.avro.OfficePhone officePhone) { + this.address = address; + this.homePhone = homePhone; + this.officePhone = officePhone; + } + + public org.apache.avro.Schema getSchema() { return SCHEMA$; } + // Used by DatumWriter. Applications should not call. + public java.lang.Object get(int field$) { + switch (field$) { + case 0: return address; + case 1: return homePhone; + case 2: return officePhone; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + // Used by DatumReader. Applications should not call. + @SuppressWarnings(value="unchecked") + public void put(int field$, java.lang.Object value$) { + switch (field$) { + case 0: address = (java.util.List)value$; break; + case 1: homePhone = (org.apache.hadoop.hive.hbase.avro.HomePhone)value$; break; + case 2: officePhone = (org.apache.hadoop.hive.hbase.avro.OfficePhone)value$; break; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + + /** + * Gets the value of the 'address' field. + */ + public java.util.List getAddress() { + return address; + } + + /** + * Sets the value of the 'address' field. + * @param value the value to set. + */ + public void setAddress(java.util.List value) { + this.address = value; + } + + /** + * Gets the value of the 'homePhone' field. + */ + public org.apache.hadoop.hive.hbase.avro.HomePhone getHomePhone() { + return homePhone; + } + + /** + * Sets the value of the 'homePhone' field. + * @param value the value to set. + */ + public void setHomePhone(org.apache.hadoop.hive.hbase.avro.HomePhone value) { + this.homePhone = value; + } + + /** + * Gets the value of the 'officePhone' field. + */ + public org.apache.hadoop.hive.hbase.avro.OfficePhone getOfficePhone() { + return officePhone; + } + + /** + * Sets the value of the 'officePhone' field. + * @param value the value to set. + */ + public void setOfficePhone(org.apache.hadoop.hive.hbase.avro.OfficePhone value) { + this.officePhone = value; + } + + /** Creates a new ContactInfo RecordBuilder */ + public static org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder newBuilder() { + return new org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder(); + } + + /** Creates a new ContactInfo RecordBuilder by copying an existing Builder */ + public static org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder other) { + return new org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder(other); + } + + /** Creates a new ContactInfo RecordBuilder by copying an existing ContactInfo instance */ + public static org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.ContactInfo other) { + return new org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder(other); + } + + /** + * RecordBuilder for ContactInfo instances. + */ + public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + private java.util.List address; + private org.apache.hadoop.hive.hbase.avro.HomePhone homePhone; + private org.apache.hadoop.hive.hbase.avro.OfficePhone officePhone; + + /** Creates a new Builder */ + private Builder() { + super(org.apache.hadoop.hive.hbase.avro.ContactInfo.SCHEMA$); + } + + /** Creates a Builder by copying an existing Builder */ + private Builder(org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder other) { + super(other); + if (isValidValue(fields()[0], other.address)) { + this.address = data().deepCopy(fields()[0].schema(), other.address); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.homePhone)) { + this.homePhone = data().deepCopy(fields()[1].schema(), other.homePhone); + fieldSetFlags()[1] = true; + } + if (isValidValue(fields()[2], other.officePhone)) { + this.officePhone = data().deepCopy(fields()[2].schema(), other.officePhone); + fieldSetFlags()[2] = true; + } + } + + /** Creates a Builder by copying an existing ContactInfo instance */ + private Builder(org.apache.hadoop.hive.hbase.avro.ContactInfo other) { + super(org.apache.hadoop.hive.hbase.avro.ContactInfo.SCHEMA$); + if (isValidValue(fields()[0], other.address)) { + this.address = data().deepCopy(fields()[0].schema(), other.address); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.homePhone)) { + this.homePhone = data().deepCopy(fields()[1].schema(), other.homePhone); + fieldSetFlags()[1] = true; + } + if (isValidValue(fields()[2], other.officePhone)) { + this.officePhone = data().deepCopy(fields()[2].schema(), other.officePhone); + fieldSetFlags()[2] = true; + } + } + + /** Gets the value of the 'address' field */ + public java.util.List getAddress() { + return address; + } + + /** Sets the value of the 'address' field */ + public org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder setAddress(java.util.List value) { + validate(fields()[0], value); + this.address = value; + fieldSetFlags()[0] = true; + return this; + } + + /** Checks whether the 'address' field has been set */ + public boolean hasAddress() { + return fieldSetFlags()[0]; + } + + /** Clears the value of the 'address' field */ + public org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder clearAddress() { + address = null; + fieldSetFlags()[0] = false; + return this; + } + + /** Gets the value of the 'homePhone' field */ + public org.apache.hadoop.hive.hbase.avro.HomePhone getHomePhone() { + return homePhone; + } + + /** Sets the value of the 'homePhone' field */ + public org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder setHomePhone(org.apache.hadoop.hive.hbase.avro.HomePhone value) { + validate(fields()[1], value); + this.homePhone = value; + fieldSetFlags()[1] = true; + return this; + } + + /** Checks whether the 'homePhone' field has been set */ + public boolean hasHomePhone() { + return fieldSetFlags()[1]; + } + + /** Clears the value of the 'homePhone' field */ + public org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder clearHomePhone() { + homePhone = null; + fieldSetFlags()[1] = false; + return this; + } + + /** Gets the value of the 'officePhone' field */ + public org.apache.hadoop.hive.hbase.avro.OfficePhone getOfficePhone() { + return officePhone; + } + + /** Sets the value of the 'officePhone' field */ + public org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder setOfficePhone(org.apache.hadoop.hive.hbase.avro.OfficePhone value) { + validate(fields()[2], value); + this.officePhone = value; + fieldSetFlags()[2] = true; + return this; + } + + /** Checks whether the 'officePhone' field has been set */ + public boolean hasOfficePhone() { + return fieldSetFlags()[2]; + } + + /** Clears the value of the 'officePhone' field */ + public org.apache.hadoop.hive.hbase.avro.ContactInfo.Builder clearOfficePhone() { + officePhone = null; + fieldSetFlags()[2] = false; + return this; + } + + @Override + public ContactInfo build() { + try { + ContactInfo record = new ContactInfo(); + record.address = fieldSetFlags()[0] ? this.address : (java.util.List) defaultValue(fields()[0]); + record.homePhone = fieldSetFlags()[1] ? this.homePhone : (org.apache.hadoop.hive.hbase.avro.HomePhone) defaultValue(fields()[1]); + record.officePhone = fieldSetFlags()[2] ? this.officePhone : (org.apache.hadoop.hive.hbase.avro.OfficePhone) defaultValue(fields()[2]); + return record; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } +} diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Employee.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Employee.java new file mode 100644 index 000000000000..d1d5cb24af8c --- /dev/null +++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Employee.java @@ -0,0 +1,356 @@ +/** + * Autogenerated by Avro + * + * DO NOT EDIT DIRECTLY + */ +package org.apache.hadoop.hive.hbase.avro; +@SuppressWarnings("all") +@org.apache.avro.specific.AvroGenerated +public class Employee extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Employee\",\"namespace\":\"org.apache.hadoop.hive.hbase.avro\",\"fields\":[{\"name\":\"employeeName\",\"type\":\"string\"},{\"name\":\"employeeID\",\"type\":\"long\"},{\"name\":\"age\",\"type\":\"long\"},{\"name\":\"gender\",\"type\":{\"type\":\"enum\",\"name\":\"Gender\",\"symbols\":[\"MALE\",\"FEMALE\"]}},{\"name\":\"contactInfo\",\"type\":{\"type\":\"record\",\"name\":\"ContactInfo\",\"fields\":[{\"name\":\"address\",\"type\":[{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Address\",\"fields\":[{\"name\":\"address1\",\"type\":\"string\"},{\"name\":\"address2\",\"type\":\"string\"},{\"name\":\"city\",\"type\":\"string\"},{\"name\":\"zipcode\",\"type\":\"long\"},{\"name\":\"county\",\"type\":[{\"type\":\"record\",\"name\":\"HomePhone\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]},{\"type\":\"record\",\"name\":\"OfficePhone\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]},\"string\",\"null\"]},{\"name\":\"aliases\",\"type\":[{\"type\":\"array\",\"items\":\"string\"},\"null\"]},{\"name\":\"metadata\",\"type\":[\"null\",{\"type\":\"map\",\"values\":\"string\"}]}]}},\"null\"]},{\"name\":\"homePhone\",\"type\":\"HomePhone\"},{\"name\":\"officePhone\",\"type\":\"OfficePhone\"}]}}]}"); + public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } + @Deprecated public java.lang.CharSequence employeeName; + @Deprecated public long employeeID; + @Deprecated public long age; + @Deprecated public org.apache.hadoop.hive.hbase.avro.Gender gender; + @Deprecated public org.apache.hadoop.hive.hbase.avro.ContactInfo contactInfo; + + /** + * Default constructor. Note that this does not initialize fields + * to their default values from the schema. If that is desired then + * one should use newBuilder(). + */ + public Employee() {} + + /** + * All-args constructor. + */ + public Employee(java.lang.CharSequence employeeName, java.lang.Long employeeID, java.lang.Long age, org.apache.hadoop.hive.hbase.avro.Gender gender, org.apache.hadoop.hive.hbase.avro.ContactInfo contactInfo) { + this.employeeName = employeeName; + this.employeeID = employeeID; + this.age = age; + this.gender = gender; + this.contactInfo = contactInfo; + } + + public org.apache.avro.Schema getSchema() { return SCHEMA$; } + // Used by DatumWriter. Applications should not call. + public java.lang.Object get(int field$) { + switch (field$) { + case 0: return employeeName; + case 1: return employeeID; + case 2: return age; + case 3: return gender; + case 4: return contactInfo; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + // Used by DatumReader. Applications should not call. + @SuppressWarnings(value="unchecked") + public void put(int field$, java.lang.Object value$) { + switch (field$) { + case 0: employeeName = (java.lang.CharSequence)value$; break; + case 1: employeeID = (java.lang.Long)value$; break; + case 2: age = (java.lang.Long)value$; break; + case 3: gender = (org.apache.hadoop.hive.hbase.avro.Gender)value$; break; + case 4: contactInfo = (org.apache.hadoop.hive.hbase.avro.ContactInfo)value$; break; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + + /** + * Gets the value of the 'employeeName' field. + */ + public java.lang.CharSequence getEmployeeName() { + return employeeName; + } + + /** + * Sets the value of the 'employeeName' field. + * @param value the value to set. + */ + public void setEmployeeName(java.lang.CharSequence value) { + this.employeeName = value; + } + + /** + * Gets the value of the 'employeeID' field. + */ + public java.lang.Long getEmployeeID() { + return employeeID; + } + + /** + * Sets the value of the 'employeeID' field. + * @param value the value to set. + */ + public void setEmployeeID(java.lang.Long value) { + this.employeeID = value; + } + + /** + * Gets the value of the 'age' field. + */ + public java.lang.Long getAge() { + return age; + } + + /** + * Sets the value of the 'age' field. + * @param value the value to set. + */ + public void setAge(java.lang.Long value) { + this.age = value; + } + + /** + * Gets the value of the 'gender' field. + */ + public org.apache.hadoop.hive.hbase.avro.Gender getGender() { + return gender; + } + + /** + * Sets the value of the 'gender' field. + * @param value the value to set. + */ + public void setGender(org.apache.hadoop.hive.hbase.avro.Gender value) { + this.gender = value; + } + + /** + * Gets the value of the 'contactInfo' field. + */ + public org.apache.hadoop.hive.hbase.avro.ContactInfo getContactInfo() { + return contactInfo; + } + + /** + * Sets the value of the 'contactInfo' field. + * @param value the value to set. + */ + public void setContactInfo(org.apache.hadoop.hive.hbase.avro.ContactInfo value) { + this.contactInfo = value; + } + + /** Creates a new Employee RecordBuilder */ + public static org.apache.hadoop.hive.hbase.avro.Employee.Builder newBuilder() { + return new org.apache.hadoop.hive.hbase.avro.Employee.Builder(); + } + + /** Creates a new Employee RecordBuilder by copying an existing Builder */ + public static org.apache.hadoop.hive.hbase.avro.Employee.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.Employee.Builder other) { + return new org.apache.hadoop.hive.hbase.avro.Employee.Builder(other); + } + + /** Creates a new Employee RecordBuilder by copying an existing Employee instance */ + public static org.apache.hadoop.hive.hbase.avro.Employee.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.Employee other) { + return new org.apache.hadoop.hive.hbase.avro.Employee.Builder(other); + } + + /** + * RecordBuilder for Employee instances. + */ + public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + private java.lang.CharSequence employeeName; + private long employeeID; + private long age; + private org.apache.hadoop.hive.hbase.avro.Gender gender; + private org.apache.hadoop.hive.hbase.avro.ContactInfo contactInfo; + + /** Creates a new Builder */ + private Builder() { + super(org.apache.hadoop.hive.hbase.avro.Employee.SCHEMA$); + } + + /** Creates a Builder by copying an existing Builder */ + private Builder(org.apache.hadoop.hive.hbase.avro.Employee.Builder other) { + super(other); + if (isValidValue(fields()[0], other.employeeName)) { + this.employeeName = data().deepCopy(fields()[0].schema(), other.employeeName); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.employeeID)) { + this.employeeID = data().deepCopy(fields()[1].schema(), other.employeeID); + fieldSetFlags()[1] = true; + } + if (isValidValue(fields()[2], other.age)) { + this.age = data().deepCopy(fields()[2].schema(), other.age); + fieldSetFlags()[2] = true; + } + if (isValidValue(fields()[3], other.gender)) { + this.gender = data().deepCopy(fields()[3].schema(), other.gender); + fieldSetFlags()[3] = true; + } + if (isValidValue(fields()[4], other.contactInfo)) { + this.contactInfo = data().deepCopy(fields()[4].schema(), other.contactInfo); + fieldSetFlags()[4] = true; + } + } + + /** Creates a Builder by copying an existing Employee instance */ + private Builder(org.apache.hadoop.hive.hbase.avro.Employee other) { + super(org.apache.hadoop.hive.hbase.avro.Employee.SCHEMA$); + if (isValidValue(fields()[0], other.employeeName)) { + this.employeeName = data().deepCopy(fields()[0].schema(), other.employeeName); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.employeeID)) { + this.employeeID = data().deepCopy(fields()[1].schema(), other.employeeID); + fieldSetFlags()[1] = true; + } + if (isValidValue(fields()[2], other.age)) { + this.age = data().deepCopy(fields()[2].schema(), other.age); + fieldSetFlags()[2] = true; + } + if (isValidValue(fields()[3], other.gender)) { + this.gender = data().deepCopy(fields()[3].schema(), other.gender); + fieldSetFlags()[3] = true; + } + if (isValidValue(fields()[4], other.contactInfo)) { + this.contactInfo = data().deepCopy(fields()[4].schema(), other.contactInfo); + fieldSetFlags()[4] = true; + } + } + + /** Gets the value of the 'employeeName' field */ + public java.lang.CharSequence getEmployeeName() { + return employeeName; + } + + /** Sets the value of the 'employeeName' field */ + public org.apache.hadoop.hive.hbase.avro.Employee.Builder setEmployeeName(java.lang.CharSequence value) { + validate(fields()[0], value); + this.employeeName = value; + fieldSetFlags()[0] = true; + return this; + } + + /** Checks whether the 'employeeName' field has been set */ + public boolean hasEmployeeName() { + return fieldSetFlags()[0]; + } + + /** Clears the value of the 'employeeName' field */ + public org.apache.hadoop.hive.hbase.avro.Employee.Builder clearEmployeeName() { + employeeName = null; + fieldSetFlags()[0] = false; + return this; + } + + /** Gets the value of the 'employeeID' field */ + public java.lang.Long getEmployeeID() { + return employeeID; + } + + /** Sets the value of the 'employeeID' field */ + public org.apache.hadoop.hive.hbase.avro.Employee.Builder setEmployeeID(long value) { + validate(fields()[1], value); + this.employeeID = value; + fieldSetFlags()[1] = true; + return this; + } + + /** Checks whether the 'employeeID' field has been set */ + public boolean hasEmployeeID() { + return fieldSetFlags()[1]; + } + + /** Clears the value of the 'employeeID' field */ + public org.apache.hadoop.hive.hbase.avro.Employee.Builder clearEmployeeID() { + fieldSetFlags()[1] = false; + return this; + } + + /** Gets the value of the 'age' field */ + public java.lang.Long getAge() { + return age; + } + + /** Sets the value of the 'age' field */ + public org.apache.hadoop.hive.hbase.avro.Employee.Builder setAge(long value) { + validate(fields()[2], value); + this.age = value; + fieldSetFlags()[2] = true; + return this; + } + + /** Checks whether the 'age' field has been set */ + public boolean hasAge() { + return fieldSetFlags()[2]; + } + + /** Clears the value of the 'age' field */ + public org.apache.hadoop.hive.hbase.avro.Employee.Builder clearAge() { + fieldSetFlags()[2] = false; + return this; + } + + /** Gets the value of the 'gender' field */ + public org.apache.hadoop.hive.hbase.avro.Gender getGender() { + return gender; + } + + /** Sets the value of the 'gender' field */ + public org.apache.hadoop.hive.hbase.avro.Employee.Builder setGender(org.apache.hadoop.hive.hbase.avro.Gender value) { + validate(fields()[3], value); + this.gender = value; + fieldSetFlags()[3] = true; + return this; + } + + /** Checks whether the 'gender' field has been set */ + public boolean hasGender() { + return fieldSetFlags()[3]; + } + + /** Clears the value of the 'gender' field */ + public org.apache.hadoop.hive.hbase.avro.Employee.Builder clearGender() { + gender = null; + fieldSetFlags()[3] = false; + return this; + } + + /** Gets the value of the 'contactInfo' field */ + public org.apache.hadoop.hive.hbase.avro.ContactInfo getContactInfo() { + return contactInfo; + } + + /** Sets the value of the 'contactInfo' field */ + public org.apache.hadoop.hive.hbase.avro.Employee.Builder setContactInfo(org.apache.hadoop.hive.hbase.avro.ContactInfo value) { + validate(fields()[4], value); + this.contactInfo = value; + fieldSetFlags()[4] = true; + return this; + } + + /** Checks whether the 'contactInfo' field has been set */ + public boolean hasContactInfo() { + return fieldSetFlags()[4]; + } + + /** Clears the value of the 'contactInfo' field */ + public org.apache.hadoop.hive.hbase.avro.Employee.Builder clearContactInfo() { + contactInfo = null; + fieldSetFlags()[4] = false; + return this; + } + + @Override + public Employee build() { + try { + Employee record = new Employee(); + record.employeeName = fieldSetFlags()[0] ? this.employeeName : (java.lang.CharSequence) defaultValue(fields()[0]); + record.employeeID = fieldSetFlags()[1] ? this.employeeID : (java.lang.Long) defaultValue(fields()[1]); + record.age = fieldSetFlags()[2] ? this.age : (java.lang.Long) defaultValue(fields()[2]); + record.gender = fieldSetFlags()[3] ? this.gender : (org.apache.hadoop.hive.hbase.avro.Gender) defaultValue(fields()[3]); + record.contactInfo = fieldSetFlags()[4] ? this.contactInfo : (org.apache.hadoop.hive.hbase.avro.ContactInfo) defaultValue(fields()[4]); + return record; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } +} diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/EmployeeAvro.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/EmployeeAvro.java new file mode 100644 index 000000000000..045f497093ac --- /dev/null +++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/EmployeeAvro.java @@ -0,0 +1,17 @@ +/** + * Autogenerated by Avro + * + * DO NOT EDIT DIRECTLY + */ +package org.apache.hadoop.hive.hbase.avro; + +@SuppressWarnings("all") +@org.apache.avro.specific.AvroGenerated +public interface EmployeeAvro { + public static final org.apache.avro.Protocol PROTOCOL = org.apache.avro.Protocol.parse("{\"protocol\":\"EmployeeAvro\",\"namespace\":\"org.apache.hadoop.hive.hbase.avro\",\"types\":[{\"type\":\"enum\",\"name\":\"Gender\",\"symbols\":[\"MALE\",\"FEMALE\"]},{\"type\":\"record\",\"name\":\"HomePhone\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]},{\"type\":\"record\",\"name\":\"OfficePhone\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]},{\"type\":\"record\",\"name\":\"Address\",\"fields\":[{\"name\":\"address1\",\"type\":\"string\"},{\"name\":\"address2\",\"type\":\"string\"},{\"name\":\"city\",\"type\":\"string\"},{\"name\":\"zipcode\",\"type\":\"long\"},{\"name\":\"county\",\"type\":[\"HomePhone\",\"OfficePhone\",\"string\",\"null\"]},{\"name\":\"aliases\",\"type\":[{\"type\":\"array\",\"items\":\"string\"},\"null\"]},{\"name\":\"metadata\",\"type\":[\"null\",{\"type\":\"map\",\"values\":\"string\"}]}]},{\"type\":\"record\",\"name\":\"ContactInfo\",\"fields\":[{\"name\":\"address\",\"type\":[{\"type\":\"array\",\"items\":\"Address\"},\"null\"]},{\"name\":\"homePhone\",\"type\":\"HomePhone\"},{\"name\":\"officePhone\",\"type\":\"OfficePhone\"}]},{\"type\":\"record\",\"name\":\"Employee\",\"fields\":[{\"name\":\"employeeName\",\"type\":\"string\"},{\"name\":\"employeeID\",\"type\":\"long\"},{\"name\":\"age\",\"type\":\"long\"},{\"name\":\"gender\",\"type\":\"Gender\"},{\"name\":\"contactInfo\",\"type\":\"ContactInfo\"}]}],\"messages\":{}}"); + + @SuppressWarnings("all") + public interface Callback extends EmployeeAvro { + public static final org.apache.avro.Protocol PROTOCOL = org.apache.hadoop.hive.hbase.avro.EmployeeAvro.PROTOCOL; + } +} \ No newline at end of file diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Gender.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Gender.java new file mode 100644 index 000000000000..160b47d9a7db --- /dev/null +++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Gender.java @@ -0,0 +1,13 @@ +/** + * Autogenerated by Avro + * + * DO NOT EDIT DIRECTLY + */ +package org.apache.hadoop.hive.hbase.avro; +@SuppressWarnings("all") +@org.apache.avro.specific.AvroGenerated +public enum Gender { + MALE, FEMALE ; + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"enum\",\"name\":\"Gender\",\"namespace\":\"org.apache.hadoop.hive.hbase.avro\",\"symbols\":[\"MALE\",\"FEMALE\"]}"); + public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } +} diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/HomePhone.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/HomePhone.java new file mode 100644 index 000000000000..d922bd724ccd --- /dev/null +++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/HomePhone.java @@ -0,0 +1,194 @@ +/** + * Autogenerated by Avro + * + * DO NOT EDIT DIRECTLY + */ +package org.apache.hadoop.hive.hbase.avro; +@SuppressWarnings("all") +@org.apache.avro.specific.AvroGenerated +public class HomePhone extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"HomePhone\",\"namespace\":\"org.apache.hadoop.hive.hbase.avro\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]}"); + public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } + @Deprecated public long areaCode; + @Deprecated public long number; + + /** + * Default constructor. Note that this does not initialize fields + * to their default values from the schema. If that is desired then + * one should use newBuilder(). + */ + public HomePhone() {} + + /** + * All-args constructor. + */ + public HomePhone(java.lang.Long areaCode, java.lang.Long number) { + this.areaCode = areaCode; + this.number = number; + } + + public org.apache.avro.Schema getSchema() { return SCHEMA$; } + // Used by DatumWriter. Applications should not call. + public java.lang.Object get(int field$) { + switch (field$) { + case 0: return areaCode; + case 1: return number; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + // Used by DatumReader. Applications should not call. + @SuppressWarnings(value="unchecked") + public void put(int field$, java.lang.Object value$) { + switch (field$) { + case 0: areaCode = (java.lang.Long)value$; break; + case 1: number = (java.lang.Long)value$; break; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + + /** + * Gets the value of the 'areaCode' field. + */ + public java.lang.Long getAreaCode() { + return areaCode; + } + + /** + * Sets the value of the 'areaCode' field. + * @param value the value to set. + */ + public void setAreaCode(java.lang.Long value) { + this.areaCode = value; + } + + /** + * Gets the value of the 'number' field. + */ + public java.lang.Long getNumber() { + return number; + } + + /** + * Sets the value of the 'number' field. + * @param value the value to set. + */ + public void setNumber(java.lang.Long value) { + this.number = value; + } + + /** Creates a new HomePhone RecordBuilder */ + public static org.apache.hadoop.hive.hbase.avro.HomePhone.Builder newBuilder() { + return new org.apache.hadoop.hive.hbase.avro.HomePhone.Builder(); + } + + /** Creates a new HomePhone RecordBuilder by copying an existing Builder */ + public static org.apache.hadoop.hive.hbase.avro.HomePhone.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.HomePhone.Builder other) { + return new org.apache.hadoop.hive.hbase.avro.HomePhone.Builder(other); + } + + /** Creates a new HomePhone RecordBuilder by copying an existing HomePhone instance */ + public static org.apache.hadoop.hive.hbase.avro.HomePhone.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.HomePhone other) { + return new org.apache.hadoop.hive.hbase.avro.HomePhone.Builder(other); + } + + /** + * RecordBuilder for HomePhone instances. + */ + public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + private long areaCode; + private long number; + + /** Creates a new Builder */ + private Builder() { + super(org.apache.hadoop.hive.hbase.avro.HomePhone.SCHEMA$); + } + + /** Creates a Builder by copying an existing Builder */ + private Builder(org.apache.hadoop.hive.hbase.avro.HomePhone.Builder other) { + super(other); + if (isValidValue(fields()[0], other.areaCode)) { + this.areaCode = data().deepCopy(fields()[0].schema(), other.areaCode); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.number)) { + this.number = data().deepCopy(fields()[1].schema(), other.number); + fieldSetFlags()[1] = true; + } + } + + /** Creates a Builder by copying an existing HomePhone instance */ + private Builder(org.apache.hadoop.hive.hbase.avro.HomePhone other) { + super(org.apache.hadoop.hive.hbase.avro.HomePhone.SCHEMA$); + if (isValidValue(fields()[0], other.areaCode)) { + this.areaCode = data().deepCopy(fields()[0].schema(), other.areaCode); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.number)) { + this.number = data().deepCopy(fields()[1].schema(), other.number); + fieldSetFlags()[1] = true; + } + } + + /** Gets the value of the 'areaCode' field */ + public java.lang.Long getAreaCode() { + return areaCode; + } + + /** Sets the value of the 'areaCode' field */ + public org.apache.hadoop.hive.hbase.avro.HomePhone.Builder setAreaCode(long value) { + validate(fields()[0], value); + this.areaCode = value; + fieldSetFlags()[0] = true; + return this; + } + + /** Checks whether the 'areaCode' field has been set */ + public boolean hasAreaCode() { + return fieldSetFlags()[0]; + } + + /** Clears the value of the 'areaCode' field */ + public org.apache.hadoop.hive.hbase.avro.HomePhone.Builder clearAreaCode() { + fieldSetFlags()[0] = false; + return this; + } + + /** Gets the value of the 'number' field */ + public java.lang.Long getNumber() { + return number; + } + + /** Sets the value of the 'number' field */ + public org.apache.hadoop.hive.hbase.avro.HomePhone.Builder setNumber(long value) { + validate(fields()[1], value); + this.number = value; + fieldSetFlags()[1] = true; + return this; + } + + /** Checks whether the 'number' field has been set */ + public boolean hasNumber() { + return fieldSetFlags()[1]; + } + + /** Clears the value of the 'number' field */ + public org.apache.hadoop.hive.hbase.avro.HomePhone.Builder clearNumber() { + fieldSetFlags()[1] = false; + return this; + } + + @Override + public HomePhone build() { + try { + HomePhone record = new HomePhone(); + record.areaCode = fieldSetFlags()[0] ? this.areaCode : (java.lang.Long) defaultValue(fields()[0]); + record.number = fieldSetFlags()[1] ? this.number : (java.lang.Long) defaultValue(fields()[1]); + return record; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } +} diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java new file mode 100644 index 000000000000..26c9df057a3c --- /dev/null +++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java @@ -0,0 +1,194 @@ +/** + * Autogenerated by Avro + * + * DO NOT EDIT DIRECTLY + */ +package org.apache.hadoop.hive.hbase.avro; +@SuppressWarnings("all") +@org.apache.avro.specific.AvroGenerated +public class OfficePhone extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"OfficePhone\",\"namespace\":\"org.apache.hadoop.hive.hbase.avro\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]}"); + public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } + @Deprecated public long areaCode; + @Deprecated public long number; + + /** + * Default constructor. Note that this does not initialize fields + * to their default values from the schema. If that is desired then + * one should use newBuilder(). + */ + public OfficePhone() {} + + /** + * All-args constructor. + */ + public OfficePhone(java.lang.Long areaCode, java.lang.Long number) { + this.areaCode = areaCode; + this.number = number; + } + + public org.apache.avro.Schema getSchema() { return SCHEMA$; } + // Used by DatumWriter. Applications should not call. + public java.lang.Object get(int field$) { + switch (field$) { + case 0: return areaCode; + case 1: return number; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + // Used by DatumReader. Applications should not call. + @SuppressWarnings(value="unchecked") + public void put(int field$, java.lang.Object value$) { + switch (field$) { + case 0: areaCode = (java.lang.Long)value$; break; + case 1: number = (java.lang.Long)value$; break; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + + /** + * Gets the value of the 'areaCode' field. + */ + public java.lang.Long getAreaCode() { + return areaCode; + } + + /** + * Sets the value of the 'areaCode' field. + * @param value the value to set. + */ + public void setAreaCode(java.lang.Long value) { + this.areaCode = value; + } + + /** + * Gets the value of the 'number' field. + */ + public java.lang.Long getNumber() { + return number; + } + + /** + * Sets the value of the 'number' field. + * @param value the value to set. + */ + public void setNumber(java.lang.Long value) { + this.number = value; + } + + /** Creates a new OfficePhone RecordBuilder */ + public static org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder newBuilder() { + return new org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder(); + } + + /** Creates a new OfficePhone RecordBuilder by copying an existing Builder */ + public static org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder other) { + return new org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder(other); + } + + /** Creates a new OfficePhone RecordBuilder by copying an existing OfficePhone instance */ + public static org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.OfficePhone other) { + return new org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder(other); + } + + /** + * RecordBuilder for OfficePhone instances. + */ + public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + private long areaCode; + private long number; + + /** Creates a new Builder */ + private Builder() { + super(org.apache.hadoop.hive.hbase.avro.OfficePhone.SCHEMA$); + } + + /** Creates a Builder by copying an existing Builder */ + private Builder(org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder other) { + super(other); + if (isValidValue(fields()[0], other.areaCode)) { + this.areaCode = data().deepCopy(fields()[0].schema(), other.areaCode); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.number)) { + this.number = data().deepCopy(fields()[1].schema(), other.number); + fieldSetFlags()[1] = true; + } + } + + /** Creates a Builder by copying an existing OfficePhone instance */ + private Builder(org.apache.hadoop.hive.hbase.avro.OfficePhone other) { + super(org.apache.hadoop.hive.hbase.avro.OfficePhone.SCHEMA$); + if (isValidValue(fields()[0], other.areaCode)) { + this.areaCode = data().deepCopy(fields()[0].schema(), other.areaCode); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.number)) { + this.number = data().deepCopy(fields()[1].schema(), other.number); + fieldSetFlags()[1] = true; + } + } + + /** Gets the value of the 'areaCode' field */ + public java.lang.Long getAreaCode() { + return areaCode; + } + + /** Sets the value of the 'areaCode' field */ + public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder setAreaCode(long value) { + validate(fields()[0], value); + this.areaCode = value; + fieldSetFlags()[0] = true; + return this; + } + + /** Checks whether the 'areaCode' field has been set */ + public boolean hasAreaCode() { + return fieldSetFlags()[0]; + } + + /** Clears the value of the 'areaCode' field */ + public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder clearAreaCode() { + fieldSetFlags()[0] = false; + return this; + } + + /** Gets the value of the 'number' field */ + public java.lang.Long getNumber() { + return number; + } + + /** Sets the value of the 'number' field */ + public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder setNumber(long value) { + validate(fields()[1], value); + this.number = value; + fieldSetFlags()[1] = true; + return this; + } + + /** Checks whether the 'number' field has been set */ + public boolean hasNumber() { + return fieldSetFlags()[1]; + } + + /** Clears the value of the 'number' field */ + public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder clearNumber() { + fieldSetFlags()[1] = false; + return this; + } + + @Override + public OfficePhone build() { + try { + OfficePhone record = new OfficePhone(); + record.areaCode = fieldSetFlags()[0] ? this.areaCode : (java.lang.Long) defaultValue(fields()[0]); + record.number = fieldSetFlags()[1] ? this.number : (java.lang.Long) defaultValue(fields()[1]); + return record; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } +} diff --git a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java index 8d3595bff09e..5e089aac3676 100644 --- a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -37,6 +37,8 @@ public class serdeConstants { public static final String SERIALIZATION_CLASS = "serialization.class"; + public static final String SERIALIZATION_TYPE = "serialization.type"; + public static final String SERIALIZATION_FORMAT = "serialization.format"; public static final String SERIALIZATION_DDL = "serialization.ddl"; diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java index 402a4ac93aca..2f0ba10669e7 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.serde2.avro; +import java.io.ByteArrayInputStream; import java.io.DataInput; import java.io.DataOutput; import java.io.DataOutputStream; @@ -25,6 +26,7 @@ import java.rmi.server.UID; import org.apache.avro.Schema; +import org.apache.avro.file.DataFileStream; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; @@ -101,6 +103,28 @@ record = new GenericData.Record(schema); GenericDatumReader gdr = new GenericDatumReader(schema); record = gdr.read(record, binaryDecoder); } + + public void readFields(byte[] bytes, int offset, int length, Schema writerSchema, Schema readerSchema) throws IOException { + fileSchema = writerSchema; + record = new GenericData.Record(writerSchema); + binaryDecoder = + DecoderFactory.get().binaryDecoder(bytes, offset, length - offset, + binaryDecoder); + GenericDatumReader gdr = + new GenericDatumReader(writerSchema, readerSchema); + record = gdr.read(null, binaryDecoder); + } + + public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException { + fileSchema = writerSchema; + record = new GenericData.Record(writerSchema); + GenericDatumReader gdr = new GenericDatumReader(); + gdr.setExpected(readerSchema); + ByteArrayInputStream is = new ByteArrayInputStream(bytes); + DataFileStream dfr = new DataFileStream(is, gdr); + record = dfr.next(record); + dfr.close(); + } public UID getRecordReaderID() { return recordReaderID; @@ -117,5 +141,4 @@ public Schema getFileSchema() { public void setFileSchema(Schema originalSchema) { this.fileSchema = originalSchema; } - } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java new file mode 100644 index 000000000000..a6e8c2cf4a21 --- /dev/null +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java @@ -0,0 +1,506 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.avro; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileStream; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumReader; +import org.apache.commons.lang.ClassUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyArray; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyMap; +import org.apache.hadoop.hive.serde2.lazy.LazyObject; +import org.apache.hadoop.hive.serde2.lazy.LazyStruct; +import org.apache.hadoop.hive.serde2.lazy.LazyUnion; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyListObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.io.Text; + +/** + * Lazy objectinspector for avro serialization + * */ +public class AvroLazyObjectInspector extends LazySimpleStructObjectInspector { + + /** + * Reader {@link Schema} for the avro data + * */ + private Schema readerSchema; + + /** + * {@link AvroSchemaRetriever} to retrieve avro schema + * */ + private AvroSchemaRetriever schemaRetriever; + + /** + * LOGGER + * */ + public static final Log LOG = LogFactory.getLog(AvroLazyObjectInspector.class); + + /** + * Constructor + * + * @param structFieldNames fields within the given protobuf object + * @param structFieldObjectInspectors object inspectors for the fields + * @param structFieldComments comments for the given fields + * @param separator separator between different fields + * @param nullSequence sequence to represent null value + * @param lastColumnTakesRest whether the last column of the struct should take the rest of the + * row if there are extra fields. + * @param escaped whether the data is escaped or not + * @param escapeChar if escaped is true, the escape character + * */ + public AvroLazyObjectInspector(List structFieldNames, + List structFieldObjectInspectors, List structFieldComments, + byte separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, + byte escapeChar) { + super(structFieldNames, structFieldObjectInspectors, structFieldComments, separator, + nullSequence, lastColumnTakesRest, escaped, escapeChar); + } + + /** + * Set the reader schema for the {@link AvroLazyObjectInspector} to the given schema + * */ + public void setReaderSchema(Schema readerSchema) { + this.readerSchema = readerSchema; + } + + /** + * Set the {@link AvroSchemaRetriever} for the {@link AvroLazyObjectInspector} to the given class + * + * @param scheamRetrieverClass the schema retriever class to be set + * */ + public void setSchemaRetriever(AvroSchemaRetriever schemaRetriever) { + this.schemaRetriever = schemaRetriever; + } + + @SuppressWarnings("unchecked") + @Override + public Object getStructFieldData(Object data, StructField fieldRef) { + if (data == null) { + return null; + } + + if (!(fieldRef instanceof MyField)) { + throw new IllegalArgumentException("fieldRef has to be of MyField"); + } + + MyField f = (MyField) fieldRef; + int fieldID = f.getFieldID(); + + if (LOG.isDebugEnabled()) { + LOG.debug("Getting struct field data for field: [" + f.getFieldName() + "] on data [" + + data.getClass() + "]"); + } + + if (data instanceof LazyStruct) { + LazyStruct row = (LazyStruct) data; + + // get the field out of struct + Object rowField = row.getField(fieldID); + + if (rowField instanceof LazyStruct) { + + if (LOG.isDebugEnabled()) { + LOG.debug("Deserializing struct [" + rowField.getClass() + "]"); + } + + return deserializeStruct(rowField, f.getFieldName()); + + } else if (rowField instanceof LazyMap) { + // We have found a map. Systematically deserialize the values of the map and return back the + // map + LazyMap lazyMap = (LazyMap) rowField; + + for (Entry entry : lazyMap.getMap().entrySet()) { + Object _key = entry.getKey(); + Object _value = entry.getValue(); + + if (_value instanceof LazyStruct) { + lazyMap.getMap().put(_key, deserializeStruct(_value, f.getFieldName())); + } + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Returning a lazy map for field [" + f.getFieldName() + "]"); + } + + return lazyMap; + + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Returning [" + rowField.toString() + "] for field [" + f.getFieldName() + "]"); + } + + // Just return the object. We need no further operation on it + return rowField; + } + } else { + + // The Avro deserializer would deserialize our object and return back a list of object that + // hive can operate on. Here we should be getting the same object back. + if (!(data instanceof List)) { + throw new IllegalArgumentException("data should be an instance of list"); + } + + if (!(fieldID < ((List) data).size())) { + return null; + } + + // lookup the field corresponding to the given field ID and return + Object field = ((List) data).get(fieldID); + + if (field == null) { + return null; + } + + // convert to a lazy object and return + return toLazyObject(field, fieldRef.getFieldObjectInspector()); + } + } + + @Override + public List getStructFieldsDataAsList(Object data) { + if (data == null) { + return null; + } + + List result = new ArrayList(fields.size()); + + for (int i = 0; i < fields.size(); i++) { + result.add(getStructFieldData(data, fields.get(i))); + } + + return result; + } + + /** + * Deserialize the given struct object + * + * @param struct the object to deserialize + * @param fieldName name of the field on which we are currently operating on + * @return a deserialized object can hive can further operate on + * @throws AvroObjectInspectorException if something goes wrong during deserialization + * */ + private Object deserializeStruct(Object struct, String fieldName) { + byte[] data = ((LazyStruct) struct).getBytes(); + AvroDeserializer deserializer = new AvroDeserializer(); + + if (data == null) { + return null; + } + + if (readerSchema == null && schemaRetriever == null) { + throw new IllegalArgumentException("reader schema or schemaRetriever must be set for field [" + + fieldName + "]"); + } + + Schema ws = null; + Schema rs = null; + int offset = 0; + + AvroGenericRecordWritable avroWritable = new AvroGenericRecordWritable(); + + if (readerSchema == null) { + + rs = schemaRetriever.retrieveReaderSchema(data); + + if (rs == null) { + // still nothing, Raise exception + throw new IllegalStateException( + "A valid reader schema could not be retrieved either directly or from the schema retriever for field [" + + fieldName + "]"); + } + + ws = schemaRetriever.retrieveWriterSchema(data); + + if (ws == null) { + throw new IllegalStateException( + "Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]"); + } + + // adjust the data bytes according to any possible offset that was provided + offset = schemaRetriever.getOffset(); + + if (data.length < offset) { + throw new IllegalArgumentException("Data size cannot be less than [" + offset + + "]. Found [" + data.length + "]"); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Retrieved writer Schema: " + ws.toString()); + LOG.debug("Retrieved reader Schema: " + rs.toString()); + } + + try { + avroWritable.readFields(data, offset, data.length, ws, rs); + } catch (IOException ioe) { + throw new AvroObjectInspectorException("Error deserializing avro payload", ioe); + } + } else { + // a reader schema was provided + if (schemaRetriever != null) { + // a schema retriever has been provided as well. Attempt to read the write schema from the + // retriever + ws = schemaRetriever.retrieveWriterSchema(data); + + if (ws == null) { + throw new IllegalStateException( + "Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]"); + } + } else { + // attempt retrieving the schema from the data + ws = retrieveSchemaFromBytes(data); + } + + rs = readerSchema; + + try { + avroWritable.readFields(data, ws, rs); + } catch (IOException ioe) { + throw new AvroObjectInspectorException("Error deserializing avro payload", ioe); + } + } + + AvroObjectInspectorGenerator oiGenerator = null; + Object deserializedObject = null; + + try { + oiGenerator = new AvroObjectInspectorGenerator(rs); + deserializedObject = + deserializer.deserialize(oiGenerator.getColumnNames(), oiGenerator.getColumnTypes(), + avroWritable, rs); + } catch (SerDeException se) { + throw new AvroObjectInspectorException("Error deserializing avro payload", se); + } + + return deserializedObject; + } + + /** + * Retrieve schema from the given bytes + * + * @return the retrieved {@link Schema schema} + * */ + private Schema retrieveSchemaFromBytes(byte[] data) { + ByteArrayInputStream bais = new ByteArrayInputStream(data); + DatumReader reader = new GenericDatumReader(); + + Schema schema = null; + + try { + // dfs is AutoCloseable + @SuppressWarnings("resource") + DataFileStream dfs = new DataFileStream(bais, reader); + schema = dfs.getSchema(); + } catch (IOException ioe) { + throw new AvroObjectInspectorException("An error occurred retrieving schema from bytes", ioe); + } + + return schema; + } + + /** + * Converts the given field to a lazy object + * + * @param field to be converted to a lazy object + * @param fieldOI {@link ObjectInspector} for the given field + * @return returns the converted lazy object + * */ + private Object toLazyObject(Object field, ObjectInspector fieldOI) { + if (isPrimitive(field.getClass())) { + return toLazyPrimitiveObject(field, fieldOI); + } else if (fieldOI instanceof LazyListObjectInspector) { + return toLazyListObject(field, fieldOI); + } else if (field instanceof StandardUnion) { + return toLazyUnionObject(field, fieldOI); + } else if (fieldOI instanceof LazyMapObjectInspector) { + return toLazyMapObject(field, fieldOI); + } else { + return field; + } + } + + /** + * Convert the given object to a lazy object using the given {@link ObjectInspector} + * + * @param obj Object to be converted to a {@link LazyObject} + * @param oi ObjectInspector used for the conversion + * @return the created {@link LazyObject lazy object} + * */ + private LazyObject toLazyPrimitiveObject(Object obj, ObjectInspector oi) { + if (obj == null) { + return null; + } + + LazyObject lazyObject = LazyFactory.createLazyObject(oi); + ByteArrayRef ref = new ByteArrayRef(); + + String objAsString = obj.toString().trim(); + + ref.setData(objAsString.getBytes()); + + // initialize the lazy object + lazyObject.init(ref, 0, ref.getData().length); + + return lazyObject; + } + + /** + * Convert the given object to a lazy object using the given {@link ObjectInspector} + * + * @param obj Object to be converted to a {@link LazyObject} + * @param oi ObjectInspector used for the conversion + * @return the created {@link LazyObject lazy object} + * */ + private Object toLazyListObject(Object obj, ObjectInspector objectInspector) { + if (obj == null) { + return null; + } + + List listObj = (List) obj; + + LazyArray retList = (LazyArray) LazyFactory.createLazyObject(objectInspector); + + List lazyList = retList.getList(); + + ObjectInspector listElementOI = + ((ListObjectInspector) objectInspector).getListElementObjectInspector(); + + for (int i = 0; i < listObj.size(); i++) { + lazyList.add(toLazyObject(listObj.get(i), listElementOI)); + } + + return retList; + } + + /** + * Convert the given object to a lazy object using the given {@link ObjectInspector} + * + * @param obj Object to be converted to a {@link LazyObject} + * @param oi ObjectInspector used for the conversion + * @return the created {@link LazyObject lazy object} + * */ + @SuppressWarnings({ "rawtypes", "unchecked" }) + private Object toLazyMapObject(Object obj, ObjectInspector objectInspector) { + if (obj == null) { + return null; + } + + // avro guarantees that the key will be of type string. So we just need to worry about + // deserializing the value here + + LazyMap lazyMap = (LazyMap) LazyFactory.createLazyObject(objectInspector); + + Map map = lazyMap.getMap(); + + Map origMap = (Map) obj; + + ObjectInspector keyObjectInspector = + ((MapObjectInspector) objectInspector).getMapKeyObjectInspector(); + ObjectInspector valueObjectInspector = + ((MapObjectInspector) objectInspector).getMapValueObjectInspector(); + + for (Entry entry : origMap.entrySet()) { + Object value = entry.getValue(); + + map.put(toLazyPrimitiveObject(entry.getKey(), keyObjectInspector), + toLazyObject(value, valueObjectInspector)); + } + + return lazyMap; + } + + /** + * Convert the given object to a lazy object using the given {@link ObjectInspector} + * + * @param obj Object to be converted to a {@link LazyObject} + * @param oi ObjectInspector used for the conversion + * @return the created {@link LazyObject lazy object} + * */ + private Object toLazyUnionObject(Object obj, ObjectInspector objectInspector) { + if (obj == null) { + return null; + } + + if (!(objectInspector instanceof LazyUnionObjectInspector)) { + throw new IllegalArgumentException( + "Invalid objectinspector found. Expected LazyUnionObjectInspector, Found " + + objectInspector.getClass()); + } + + StandardUnion standardUnion = (StandardUnion) obj; + + // Grab the tag and the field + byte tag = standardUnion.getTag(); + Object field = standardUnion.getObject(); + + ObjectInspector fieldOI = + ((LazyUnionObjectInspector) objectInspector).getObjectInspectors().get(tag); + + // convert to lazy object + Object convertedObj = null; + + if (field != null) { + convertedObj = toLazyObject(field, fieldOI); + } + + if (convertedObj == null) { + return null; + } + + LazyUnion lazyUnion = (LazyUnion) LazyFactory.createLazyObject(objectInspector); + + lazyUnion.setField(convertedObj); + lazyUnion.setTag(tag); + + return lazyUnion; + } + + /** + * Determines if the given object is a primitive or a wrapper to a primitive. Note, even though a + * String may not be a primitive in the traditional sense, but it is considered one + * here as it is not a struct. + * + * @param clazz input class + * @return true, if the object is a primitive or a wrapper to a primitive, false otherwise. + * */ + private boolean isPrimitive(Class clazz) { + return clazz.isPrimitive() || ClassUtils.wrapperToPrimitive(clazz) != null + || clazz.getSimpleName().equals("String"); + } +} \ No newline at end of file diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java new file mode 100644 index 000000000000..fe4b2eb6939b --- /dev/null +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java @@ -0,0 +1,25 @@ +package org.apache.hadoop.hive.serde2.avro; + +/** + * Exception for the {@link AvroLazyObjectInspector} + * */ +public class AvroObjectInspectorException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + public AvroObjectInspectorException() { + super(); + } + + public AvroObjectInspectorException(String message) { + super(message); + } + + public AvroObjectInspectorException(Throwable cause) { + super(cause); + } + + public AvroObjectInspectorException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java index 46cdb4f8a6aa..64a65433c7b4 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java @@ -40,7 +40,7 @@ * * A list of those fields equivalent types in Hive * * An ObjectInspector capable of working with an instance of that datum. */ -class AvroObjectInspectorGenerator { +public class AvroObjectInspectorGenerator { final private List columnNames; final private List columnTypes; final private ObjectInspector oi; diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java new file mode 100644 index 000000000000..fcd262184bf5 --- /dev/null +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.avro; + +import org.apache.avro.Schema; + +/** + * Retrieves the avro schema from the given source. "Source" is a little loose term here in the + * sense it can range from being an HDFS url location pointing to the schema or it can be even as + * simple as a {@link Properties properties} file with a simple key-value mapping to the schema. For + * cases where the {@link Schema schema} is a part of the serialized data itself, "Source" would + * refer to the data bytes from which the {@link Schema schema} has to retrieved. + * + * */ +public abstract class AvroSchemaRetriever { + + /** + * Retrieve the writer avro schema from the given source + * + * @param source source from which the schema has to retrieved + * @return the retrieved writer {@link Schema} + * */ + public abstract Schema retrieveWriterSchema(Object source); + + /** + * Retrieve the reader avro schema from the given source + * + * @param source source from which the schema has to retrieved + * @return the retrieved reader {@link Schema} + * */ + public Schema retrieveReaderSchema(Object source) { + return null; + } + + /** + * Possible offset associated with schema. This is useful when the schema is stored inline along + * with the data. + * + *

+ * Defaulted to zero. Consumers can choose to override this value to provide a custom offset. + *

+ * */ + public int getOffset() { + return 0; + } +} \ No newline at end of file diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java index 7c48e9bbe4e3..5da12cb177de 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java @@ -57,6 +57,7 @@ public class AvroSerdeUtils { public static final String EXCEPTION_MESSAGE = "Neither " + SCHEMA_LITERAL + " nor " + SCHEMA_URL + " specified, can't determine table schema"; public static final String AVRO_SERDE_SCHEMA = "avro.serde.schema"; + public static final String SCHEMA_RETRIEVER = "avro.schema.retriever"; /** * Determine the schema to that's been provided for Avro serde work. diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java index d9a38ba5cda1..e3968a952a38 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; @@ -217,11 +218,11 @@ public static LazyObject createLazyObject(ObjectInspe */ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, - byte escapeChar) throws SerDeException { + byte escapeChar, ObjectInspectorOptions option) throws SerDeException { return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, - escaped, escapeChar, false); + escaped, escapeChar, false, option); } - + /** * Create a hierarchical ObjectInspector for LazyObject with the given * typeInfo. @@ -236,13 +237,54 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, * delimiting entries, the second one for delimiting key and values. * @param nullSequence * The sequence of bytes representing NULL. + * @return The ObjectInspector + * @throws SerDeException + */ + public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, + byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte escapeChar) throws SerDeException { + return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, + escaped, escapeChar, false, ObjectInspectorOptions.JAVA); + } + + /** + * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo. + * + * @param typeInfo The type information for the LazyObject + * @param separator The array of separators for delimiting each level + * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of + * separator, and map uses 2 levels: the first one for delimiting entries, the second one + * for delimiting key and values. + * @param nullSequence The sequence of bytes representing NULL. * @param extendedBooleanLiteral whether extended boolean literal set is legal + * @param option the {@link ObjectInspectorOption} * @return The ObjectInspector * @throws SerDeException */ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException { + return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, escaped, + escapeChar, extendedBooleanLiteral, ObjectInspectorOptions.JAVA); + } + + /** + * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo. + * + * @param typeInfo The type information for the LazyObject + * @param separator The array of separators for delimiting each level + * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of + * separator, and map uses 2 levels: the first one for delimiting entries, the second one + * for delimiting key and values. + * @param nullSequence The sequence of bytes representing NULL. + * @param extendedBooleanLiteral whether extended boolean literal set is legal + * @param option the {@link ObjectInspectorOption} + * @return The ObjectInspector + * @throws SerDeException + */ + public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, + byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte escapeChar, boolean extendedBooleanLiteral, ObjectInspectorOptions option) throws SerDeException { ObjectInspector.Category c = typeInfo.getCategory(); switch (c) { case PRIMITIVE: @@ -252,9 +294,9 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, return LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( createLazyObjectInspector(((MapTypeInfo) typeInfo) .getMapKeyTypeInfo(), separator, separatorIndex + 2, - nullSequence, escaped, escapeChar, extendedBooleanLiteral), createLazyObjectInspector( + nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), createLazyObjectInspector( ((MapTypeInfo) typeInfo).getMapValueTypeInfo(), separator, - separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral), + separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), LazyUtils.getSeparator(separator, separatorIndex), LazyUtils.getSeparator(separator, separatorIndex+1), nullSequence, escaped, escapeChar); @@ -262,7 +304,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, return LazyObjectInspectorFactory.getLazySimpleListObjectInspector( createLazyObjectInspector(((ListTypeInfo) typeInfo) .getListElementTypeInfo(), separator, separatorIndex + 1, - nullSequence, escaped, escapeChar, extendedBooleanLiteral), LazyUtils.getSeparator(separator, separatorIndex), + nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), LazyUtils.getSeparator(separator, separatorIndex), nullSequence, escaped, escapeChar); case STRUCT: StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; @@ -274,19 +316,20 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, for (int i = 0; i < fieldTypeInfos.size(); i++) { fieldObjectInspectors.add(createLazyObjectInspector(fieldTypeInfos .get(i), separator, separatorIndex + 1, nullSequence, escaped, - escapeChar, extendedBooleanLiteral)); + escapeChar, extendedBooleanLiteral, option)); } return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector( fieldNames, fieldObjectInspectors, LazyUtils.getSeparator(separator, separatorIndex), - nullSequence, false, escaped, escapeChar); + nullSequence, + false, escaped, escapeChar, option); case UNION: UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; List lazyOIs = new ArrayList(); for (TypeInfo uti : unionTypeInfo.getAllUnionObjectTypeInfos()) { lazyOIs.add(createLazyObjectInspector(uti, separator, separatorIndex + 1, nullSequence, escaped, - escapeChar, extendedBooleanLiteral)); + escapeChar, extendedBooleanLiteral, option)); } return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs, LazyUtils.getSeparator(separator, separatorIndex), diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java index c74af4b84c98..588cc8c186fa 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java @@ -342,4 +342,13 @@ private int[] findIndexes(byte[] array, byte[] target) { } return indexes; } -} + + /** + * Return the data in bytes corresponding to this given struct. This is useful specifically in + * cases where the data is stored in serialized formats like protobufs or thrift and would need + * custom deserializers to be deserialized. + * */ + public byte[] getBytes() { + return bytes.getData(); + } +} \ No newline at end of file diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java index 9f6bc3f2e63a..22fc638c0594 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazy; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Text; /** @@ -26,8 +25,7 @@ * non-primitive. * */ -public class LazyUnion extends - LazyNonPrimitive { +public class LazyUnion extends LazyNonPrimitive { /** * Whether the data is already parsed or not. */ @@ -41,7 +39,7 @@ public class LazyUnion extends /** * The object of the union. */ - private LazyObject field; + private Object field; /** * Tag of the Union @@ -53,6 +51,16 @@ public class LazyUnion extends */ private boolean fieldInited = false; + /** + * Whether the tag has been set or not + * */ + private boolean tagSet = false; + + /** + * Whether the field has been set or not + * */ + private boolean fieldSet = false; + /** * Construct a LazyUnion object with the ObjectInspector. */ @@ -123,6 +131,7 @@ private void parse() { * * @return The value of the field */ + @SuppressWarnings("rawtypes") private Object uncheckedGetField() { Text nullSequence = oi.getNullSequence(); int fieldLength = start + length - startPosition; @@ -134,9 +143,9 @@ private Object uncheckedGetField() { if (!fieldInited) { fieldInited = true; - field.init(bytes, startPosition, fieldLength); + ((LazyObject) field).init(bytes, startPosition, fieldLength); } - return field.getObject(); + return ((LazyObject) field).getObject(); } /** @@ -145,6 +154,10 @@ private Object uncheckedGetField() { * @return The field as a LazyObject */ public Object getField() { + if (fieldSet) { + return field; + } + if (!parsed) { parse(); } @@ -157,9 +170,33 @@ public Object getField() { * @return The tag byte */ public byte getTag() { + if (tagSet) { + return tag; + } + if (!parsed) { parse(); } return tag; } -} + + /** + * Set the field of the union + * + * @param field the field to be set + * */ + public void setField(Object field) { + this.field = field; + fieldSet = true; + } + + /** + * Set the tag for the union + * + * @param tag the tag to be set + * */ + public void setTag(byte tag) { + this.tag = tag; + tagSet = true; + } +} \ No newline at end of file diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java index ed670b029309..1abd8a5229d9 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java @@ -22,7 +22,9 @@ import java.util.List; import java.util.concurrent.ConcurrentHashMap; +import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.io.Text; /** @@ -48,14 +50,34 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector byte escapeChar) { return getLazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, null, separator, nullSequence, - lastColumnTakesRest, escaped, escapeChar); + lastColumnTakesRest, escaped, escapeChar, ObjectInspectorOptions.JAVA); + } + + public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( + List structFieldNames, + List structFieldObjectInspectors, byte separator, + Text nullSequence, boolean lastColumnTakesRest, boolean escaped, + byte escapeChar, ObjectInspectorOptions option) { + return getLazySimpleStructObjectInspector(structFieldNames, + structFieldObjectInspectors, null, separator, nullSequence, + lastColumnTakesRest, escaped, escapeChar, option); } public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( List structFieldNames, List structFieldObjectInspectors, List structFieldComments, byte separator, Text nullSequence, boolean lastColumnTakesRest, - boolean escaped,byte escapeChar) { + boolean escaped, byte escapeChar) { + return getLazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, + structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar, + ObjectInspectorOptions.JAVA); + } + + public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( + List structFieldNames, + List structFieldObjectInspectors, List structFieldComments, + byte separator, Text nullSequence, boolean lastColumnTakesRest, + boolean escaped,byte escapeChar, ObjectInspectorOptions option) { ArrayList signature = new ArrayList(); signature.add(structFieldNames); signature.add(structFieldObjectInspectors); @@ -64,15 +86,30 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector signature.add(Boolean.valueOf(lastColumnTakesRest)); signature.add(Boolean.valueOf(escaped)); signature.add(Byte.valueOf(escapeChar)); + signature.add(option); if(structFieldComments != null) { signature.add(structFieldComments); } LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector .get(signature); if (result == null) { - result = new LazySimpleStructObjectInspector(structFieldNames, - structFieldObjectInspectors, structFieldComments, separator, - nullSequence, lastColumnTakesRest, escaped, escapeChar); + switch (option) { + case JAVA: + result = + new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, + structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, + escapeChar); + break; + case AVRO: + result = + new AvroLazyObjectInspector(structFieldNames, structFieldObjectInspectors, + structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, + escapeChar); + break; + default: + throw new IllegalArgumentException("Illegal ObjectInspector type [" + option + "]"); + } + cachedLazySimpleStructObjectInspector.put(signature, result); } return result; diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java index ddadfa548d7f..9611e9f9a384 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java @@ -18,10 +18,13 @@ package org.apache.hadoop.hive.serde2.lazy.objectinspector; +import java.util.ArrayList; import java.util.List; import org.apache.hadoop.hive.serde2.BaseStructObjectInspector; import org.apache.hadoop.hive.serde2.StructObject; +import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.io.Text; @@ -100,6 +103,20 @@ public Object getStructFieldData(Object data, StructField fieldRef) { int fieldID = f.getFieldID(); assert (fieldID >= 0 && fieldID < fields.size()); + ObjectInspector oi = f.getFieldObjectInspector(); + + if (oi instanceof AvroLazyObjectInspector) { + return ((AvroLazyObjectInspector) oi).getStructFieldData(data, fieldRef); + } + + if (oi instanceof MapObjectInspector) { + ObjectInspector valueOI = ((MapObjectInspector) oi).getMapValueObjectInspector(); + + if (valueOI instanceof AvroLazyObjectInspector) { + return ((AvroLazyObjectInspector) valueOI).getStructFieldData(data, fieldRef); + } + } + return struct.getField(fieldID); } @@ -108,8 +125,15 @@ public List getStructFieldsDataAsList(Object data) { if (data == null) { return null; } - StructObject struct = (StructObject) data; - return struct.getFieldsAsList(); + + // Iterate over all the fields picking up the nested structs within them + List result = new ArrayList(fields.size()); + + for (MyField myField : fields) { + result.add(getStructFieldData(data, myField)); + } + + return result; } // For LazyStruct diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java index 9a226b38106c..eded0919f88a 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java @@ -57,7 +57,7 @@ public final class ObjectInspectorFactory { * for the same Java type. */ public enum ObjectInspectorOptions { - JAVA, THRIFT, PROTOCOL_BUFFERS + JAVA, THRIFT, PROTOCOL_BUFFERS, AVRO }; private static ConcurrentHashMap objectInspectorCache = new ConcurrentHashMap();