Skip to content

Commit

Permalink
converter and validator
Browse files Browse the repository at this point in the history
  • Loading branch information
adamdubiel committed Feb 5, 2016
0 parents commit ffef802
Show file tree
Hide file tree
Showing 32 changed files with 1,969 additions and 0 deletions.
23 changes: 23 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Intellij Idea project files
.idea
*.iml
*.ipr
*.iws

# gradle config
.gradle

# project binaries
build
out
classes

# sonar
sonar-project.properties

# mac os x
.DS_Store

# netbeans
.nb-gradle
.nb-gradle-properties
114 changes: 114 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# json-avro-converter

JSON to Avro conversion tool designed to make migration to Avro easier. Includes simple command line validator.

## Motivation

Apache Avro ships with some very advanced and efficient tools for reading and writing binary Avro but their support
for JSON to Avro conversion is unfortunately limited and requires wrapping fields with type declarations if you have
some optional fields in your schema. This tool is supposed to help with migrating project from using JSON to Avro without
having to modify JSON data if it conforms to the JSON schema.

## JSON2Avro Converter

### Features

* conversion binary JSON to binary Avro
* conversion binary JSON to GenericData.Record
* conversion binary Avro to binary JSON
* optional field support (unions do not require wrapping)
* unknown fields that are not declared in schema are ignored

### Dependencies

```groovy
dependencies {
compile group: 'tech.allegro.schema.json2avro', name: 'json-avro-converter', version: 'x.x.x'
}
```

### Basic usage

```java
import tech.allegro.schema.json2avro.converter.JsonAvroConverter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.Schema;

// Avro schema with one string field: username
String schema =
"{" +
" \"type\" : \"record\"," +
" \"name\" : \"Acme\"," +
" \"fields\" : [{ \"name\" : \"username\", \"type\" : \"string\" }]" +
"}";

String json = "{ \"username\": \"mike\" }";

JsonAvroConverter converter = new JsonAvroConverter();

// conversion to binary Avro
byte[] avro = converter.convertJsonToAvro(json.getBytes(), schema);

// conversion to GenericData.Record
GenericData.Record record = converter.convertToGenericDataRecord(json.getBytes(), new Schema.Parser().parse(schema));

// conversion from binary Avro to JSON
byte[] binaryJson = converter.convertToJson(avro, schema);

// exception handling
String invalidJson = "{ \"username\": 8 }";

try {
converter.convertJsonToAvro(invalidJson.getBytes(), schema);
} catch (AvroConversionException ex) {
// ...
}
```

## Validator

A command line tool for validating your JSON/Avro documents against the schema.

### Build

To bundle the tool into single executable JAR:

```bash
./gradlew :validator:shadowJar
java -jar validator/build/libs/json2avro-validator.jar --help
```

### Usage

Running Validator with `--help` option will print help message listing all possible arguments.
Sample Avro schema and messages can be found in:

* schema: `validator/src/test/resources/user.avcs`
* JSON message: `validator/src/test/resources/user.json`
* Avro message: `validator/src/test/resources/user.avro`

#### JSON to Avro

You can validate your JSON to Avro conversion:

```bash
java -jar json2avro-validator.jar -s user.avcs -i user.json
```

If everything will process correctly, the process will end with zero status code.

#### Avro to JSON

You can convert the Avro binary data into JSON by setting mode ``-m avro2json`` option:

```bash
java -jar json2avro-validator.jar -s user.avcs -i user.avro -m avro2json
```

#### JSON to Avro to JSON

If you would like to know how message will look like after encoding and decoding invoke:

```bash
java -jar json2avro-validator.jar -s user.avcs -i user.json -m json2avro2json
```
37 changes: 37 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
plugins {
id 'java'
id 'pl.allegro.tech.build.axion-release' version '1.3.3'
}

scmVersion {
tag {
prefix = 'json-avro-converter'
}
versionCreator 'versionWithBranch'
}

sourceCompatibility = 1.8

project.group = 'tech.allegro.schema.json2avro'
project.version = scmVersion.version

subprojects {

project.ext.versions = [
spock: '1.0-groovy-2.4'
]

repositories {
mavenCentral()
}

test {
testLogging {
exceptionFormat 'full'
}
}
}

task wrapper(type: Wrapper) {
gradleVersion = '2.10'
}
13 changes: 13 additions & 0 deletions converter/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

plugins {
id 'java'
id 'groovy'
id 'maven'
id 'jacoco'
}

dependencies {
compile group: 'org.apache.avro', name: 'avro', version: '1.7.7'

testCompile group: 'org.spockframework', name: 'spock-core', version: versions.spock
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.apache.avro.io;

import org.apache.avro.Schema;
import org.apache.avro.io.parsing.Symbol;
import org.codehaus.jackson.JsonGenerator;

import java.io.IOException;
import java.io.OutputStream;

public class NoWrappingJsonEncoder extends JsonEncoder {
public NoWrappingJsonEncoder(Schema sc, OutputStream out) throws IOException {
super(sc, out);
}

public NoWrappingJsonEncoder(Schema sc, OutputStream out, boolean pretty) throws IOException {
super(sc, out, pretty);
}

public NoWrappingJsonEncoder(Schema sc, JsonGenerator out) throws IOException {
super(sc, out);
}

@Override
public void writeIndex(int unionIndex) throws IOException {
parser.advance(Symbol.UNION);
Symbol.Alternative top = (Symbol.Alternative) parser.popSymbol();
Symbol symbol = top.getSymbol(unionIndex);
parser.pushSymbol(symbol);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package tech.allegro.schema.json2avro.converter;

import org.apache.avro.AvroRuntimeException;

public class AvroConversionException extends AvroRuntimeException {

public AvroConversionException(String message) {
super(message);
}

public AvroConversionException(String message, Throwable cause) {
super(message, cause);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package tech.allegro.schema.json2avro.converter;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.NoWrappingJsonEncoder;
import org.codehaus.jackson.map.ObjectMapper;

import java.io.ByteArrayOutputStream;
import java.io.IOException;

public class JsonAvroConverter {
private JsonGenericRecordReader recordReader;

public JsonAvroConverter() {
this.recordReader = new JsonGenericRecordReader();
}

public JsonAvroConverter(ObjectMapper objectMapper) {
this.recordReader = new JsonGenericRecordReader(objectMapper);
}

public byte[] convertToAvro(byte[] data, String schema) {
return convertToAvro(data, new Schema.Parser().parse(schema));
}

public byte[] convertToAvro(byte[] data, Schema schema) {
try {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);
GenericDatumWriter<Object> writer = new GenericDatumWriter<>(schema);
writer.write(convertToGenericDataRecord(data, schema), encoder);
encoder.flush();
return outputStream.toByteArray();
} catch (IOException e) {
throw new AvroConversionException("Failed to convert to AVRO.", e);
}
}

public GenericData.Record convertToGenericDataRecord(byte[] data, Schema schema) {
return recordReader.read(data, schema);
}

public byte[] convertToJson(byte[] avro, String schema) {
return convertToJson(avro, new Schema.Parser().parse(schema));
}

public byte[] convertToJson(byte[] avro, Schema schema) {
try {
BinaryDecoder binaryDecoder = DecoderFactory.get().binaryDecoder(avro, null);
GenericRecord record = new GenericDatumReader<GenericRecord>(schema).read(null, binaryDecoder);
return convertToJson(record);
} catch (IOException e) {
throw new AvroConversionException("Failed to create avro structure.", e);
}
}

public byte[] convertToJson(GenericRecord record) {
try {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
NoWrappingJsonEncoder jsonEncoder = new NoWrappingJsonEncoder(record.getSchema(), outputStream);
new GenericDatumWriter<GenericRecord>(record.getSchema()).write(record, jsonEncoder);
jsonEncoder.flush();
return outputStream.toByteArray();
} catch (IOException e) {
throw new AvroConversionException("Failed to convert to JSON.", e);
}
}
}
Loading

0 comments on commit ffef802

Please sign in to comment.