Skip to content

Commit

Permalink
HIVE-6147 - Support avro data stored in HBase columns (Swarnim Kulkar…
Browse files Browse the repository at this point in the history
…ni via Brock)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1623845 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
Brock Noland committed Sep 9, 2014
1 parent afecaff commit b8c0252
Show file tree
Hide file tree
Showing 35 changed files with 4,406 additions and 111 deletions.
144 changes: 144 additions & 0 deletions hbase-handler/if/test/avro_test.avpr
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
{
"protocol": "EmployeeAvro",
"namespace": "org.apache.hadoop.hive.hbase.avro",
"types": [
{
"type": "enum",
"name": "Gender",
"symbols": [
"MALE",
"FEMALE"
]
},
{
"type": "record",
"name": "HomePhone",
"fields": [
{
"name": "areaCode",
"type": "long"
},
{
"name": "number",
"type": "long"
}
]
},
{
"type": "record",
"name": "OfficePhone",
"fields": [
{
"name": "areaCode",
"type": "long"
},
{
"name": "number",
"type": "long"
}
]
},
{
"type": "record",
"name": "Address",
"fields": [
{
"name": "address1",
"type": "string"
},
{
"name": "address2",
"type": "string"
},
{
"name": "city",
"type": "string"
},
{
"name": "zipcode",
"type": "long"
},
{
"name": "county",
"type": [
"HomePhone",
"OfficePhone",
"string",
"null"
]
},
{
"name": "aliases",
"type": [
{
"type": "array",
"items": "string"
},
"null"
]
},
{
"name": "metadata",
"type": [
"null",
{
"type": "map",
"values": "string"
}
]
}
]
},
{
"type": "record",
"name": "ContactInfo",
"fields": [
{
"name": "address",
"type": [
{
"type": "array",
"items": "Address"
},
"null"
]
},
{
"name": "homePhone",
"type": "HomePhone"
},
{
"name": "officePhone",
"type": "OfficePhone"
}
]
},
{
"type": "record",
"name": "Employee",
"fields": [
{
"name": "employeeName",
"type": "string"
},
{
"name": "employeeID",
"type": "long"
},
{
"name": "age",
"type": "long"
},
{
"name": "gender",
"type": "Gender"
},
{
"name": "contactInfo",
"type": "ContactInfo"
}
]
}
],
"messages": { }
}
67 changes: 67 additions & 0 deletions hbase-handler/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.7.6</version>
</dependency>
</dependencies>

<profiles>
Expand Down Expand Up @@ -100,6 +105,12 @@
<artifactId>hbase-hadoop-compat</artifactId>
<version>${hbase.hadoop1.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-test</artifactId>
<version>${hadoop-20S.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
Expand Down Expand Up @@ -132,12 +143,26 @@
<version>${hadoop-23.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop-23.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop-23.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop-23.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-hadoop2-compat</artifactId>
Expand Down Expand Up @@ -190,6 +215,12 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-servlet</artifactId>
<version>${jersey.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
</profile>
</profiles>
Expand All @@ -209,6 +240,42 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<version>1.7.6</version>
<executions>
<execution>
<phase>generate-test-sources</phase>
<goals>
<goal>protocol</goal>
</goals>
<configuration>
<testSourceDirectory>${project.basedir}/if/test</testSourceDirectory>
<testOutputDirectory>${project.basedir}/src/test</testOutputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<id>add-test-sources</id>
<phase>generate-test-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>${project.basedir}/src/gen/avro/gen-java</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,21 @@

package org.apache.hadoop.hive.hbase;

import com.google.common.collect.Iterators;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import com.google.common.collect.Iterators;

public class ColumnMappings implements Iterable<ColumnMappings.ColumnMapping> {

Expand All @@ -53,24 +58,41 @@ public int size() {
return columnsMapping.length;
}

String toTypesString() {
String toNamesString(Properties tbl, String autogenerate) {
if (autogenerate != null && autogenerate.equals("true")) {
StringBuilder sb = new StringBuilder();
HBaseSerDeHelper.generateColumns(tbl, Arrays.asList(columnsMapping), sb);
return sb.toString();
}

return StringUtils.EMPTY; // return empty string
}

String toTypesString(Properties tbl, Configuration conf, String autogenerate)
throws SerDeException {
StringBuilder sb = new StringBuilder();
for (ColumnMapping colMap : columnsMapping) {
if (sb.length() > 0) {
sb.append(":");
}
if (colMap.hbaseRowKey) {
// the row key column becomes a STRING
sb.append(serdeConstants.STRING_TYPE_NAME);
} else if (colMap.qualifierName == null) {
// a column family become a MAP
sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
+ serdeConstants.STRING_TYPE_NAME + ">");
} else {
// an individual column becomes a STRING
sb.append(serdeConstants.STRING_TYPE_NAME);

if (autogenerate != null && autogenerate.equals("true")) {
HBaseSerDeHelper.generateColumnTypes(tbl, Arrays.asList(columnsMapping), sb, conf);
} else {
for (ColumnMapping colMap : columnsMapping) {
if (sb.length() > 0) {
sb.append(":");
}
if (colMap.hbaseRowKey) {
// the row key column becomes a STRING
sb.append(serdeConstants.STRING_TYPE_NAME);
} else if (colMap.qualifierName == null) {
// a column family become a MAP
sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
+ serdeConstants.STRING_TYPE_NAME + ">");
} else {
// an individual column becomes a STRING
sb.append(serdeConstants.STRING_TYPE_NAME);
}
}
}

return sb.toString();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
package org.apache.hadoop.hive.hbase;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
Expand Down Expand Up @@ -94,4 +96,14 @@ public LazyObject<? extends ObjectInspector> toLazyObject(int fieldID, byte[] by

return lazyObject;
}

/**
* Return the different parts of the key. By default, this returns an empty map. Consumers can
* choose to override this to provide their own names and types of parts of the key.
*
* @return map of parts name to their type
* */
public Map<String, String> getParts() {
return Collections.emptyMap();
}
}
Loading

0 comments on commit b8c0252

Please sign in to comment.