-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from linkedin/master
Merging from master
- Loading branch information
Showing
2 changed files
with
133 additions
and
2 deletions.
There are no files selected for viewing
131 changes: 131 additions & 0 deletions
131
...ka-coders/src/main/java/com/linkedin/camus/etl/kafka/coders/JSONToAvroMessageDecoder.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
package com.linkedin.camus.etl.kafka.coders; | ||
|
||
import java.io.ByteArrayInputStream; | ||
import java.io.ByteArrayOutputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.Properties; | ||
|
||
import org.apache.avro.Schema; | ||
import org.apache.avro.generic.GenericData; | ||
import org.apache.avro.generic.GenericData.Record; | ||
import org.apache.avro.generic.GenericDatumReader; | ||
import org.apache.avro.generic.GenericDatumWriter; | ||
import org.apache.avro.generic.GenericRecord; | ||
import org.apache.avro.io.DatumReader; | ||
import org.apache.avro.io.DecoderFactory; | ||
import org.apache.avro.io.Encoder; | ||
import org.apache.avro.io.EncoderFactory; | ||
import org.apache.avro.io.JsonDecoder; | ||
import org.apache.log4j.Logger; | ||
|
||
import com.google.gson.JsonObject; | ||
import com.google.gson.JsonParser; | ||
import com.linkedin.camus.coders.CamusWrapper; | ||
import com.linkedin.camus.coders.Message; | ||
import com.linkedin.camus.coders.MessageDecoder; | ||
import com.linkedin.camus.coders.MessageDecoderException; | ||
import com.linkedin.camus.schemaregistry.CachedSchemaRegistry; | ||
import com.linkedin.camus.schemaregistry.SchemaRegistry; | ||
|
||
public class JSONToAvroMessageDecoder extends MessageDecoder<Message, GenericData.Record> { | ||
private static final Logger log = Logger.getLogger(JsonStringMessageDecoder.class); | ||
public static final String CAMUS_SCHEMA_ID_FIELD = "camus.message.schema.id.field"; | ||
public static final String DEFAULT_SCHEMA_ID_FIELD = "schemaID"; | ||
JsonParser jsonParser; | ||
private String schemaIDField; | ||
protected DecoderFactory decoderFactory; | ||
protected SchemaRegistry<Schema> registry; | ||
private Schema latestSchema; | ||
|
||
public JSONToAvroMessageDecoder() { | ||
this.jsonParser = new JsonParser(); | ||
} | ||
|
||
public void init(Properties props, String topicName) { | ||
super.init(props, topicName); | ||
this.props = props; | ||
this.topicName = topicName; | ||
|
||
this.schemaIDField = props.getProperty(CAMUS_SCHEMA_ID_FIELD, DEFAULT_SCHEMA_ID_FIELD); | ||
try { | ||
SchemaRegistry<Schema> registry = (SchemaRegistry<Schema>) Class.forName( | ||
props.getProperty(KafkaAvroMessageEncoder.KAFKA_MESSAGE_CODER_SCHEMA_REGISTRY_CLASS)).newInstance(); | ||
log.info("Prop " + KafkaAvroMessageEncoder.KAFKA_MESSAGE_CODER_SCHEMA_REGISTRY_CLASS + " is: " | ||
+ props.getProperty(KafkaAvroMessageEncoder.KAFKA_MESSAGE_CODER_SCHEMA_REGISTRY_CLASS)); | ||
log.info("Underlying schema registry for topic: " + topicName + " is: " + registry); | ||
registry.init(props); | ||
|
||
this.registry = new CachedSchemaRegistry<Schema>(registry, props); | ||
this.latestSchema = ((Schema) registry.getLatestSchemaByTopic(topicName).getSchema()); | ||
} catch (Exception e) { | ||
throw new MessageDecoderException(e); | ||
} | ||
|
||
this.decoderFactory = DecoderFactory.get(); | ||
} | ||
|
||
|
||
public class MessageDecoderHelper { | ||
private Schema schema; | ||
private Schema targetSchema; | ||
private final SchemaRegistry<Schema> registry; | ||
private final String topicName; | ||
|
||
public MessageDecoderHelper(SchemaRegistry<Schema> registry, String topicName) { | ||
this.registry = registry; | ||
this.topicName = topicName; | ||
} | ||
|
||
public Schema getSchema() { | ||
return this.schema; | ||
} | ||
|
||
public Schema getTargetSchema() { | ||
return this.targetSchema; | ||
} | ||
|
||
public MessageDecoderHelper invoke(String id) { | ||
this.schema = (this.registry.getSchemaByID(this.topicName, id)); | ||
if (this.schema == null) | ||
throw new IllegalStateException("Unknown schema id: " + id); | ||
this.targetSchema = JSONToAvroMessageDecoder.this.latestSchema; | ||
return this; | ||
} | ||
} | ||
|
||
@Override | ||
public CamusWrapper<Record> decode(Message message) { | ||
String payloadString = new String(message.getPayload()); | ||
try { | ||
JsonObject jsonObject = this.jsonParser.parse(payloadString.trim()).getAsJsonObject(); | ||
String templateID = jsonObject.get(schemaIDField).getAsString(); | ||
|
||
MessageDecoderHelper helper = new MessageDecoderHelper(this.registry, this.topicName).invoke(templateID); | ||
GenericRecord datum = null; | ||
DatumReader<GenericRecord> reader = helper.getTargetSchema() == null ? new GenericDatumReader<GenericRecord>( | ||
helper.getSchema()) : new GenericDatumReader<GenericRecord>(helper.getSchema(), helper.getTargetSchema()); | ||
InputStream inStream = new ByteArrayInputStream(message.getPayload()); | ||
JsonDecoder jsonDecoder = DecoderFactory.get().jsonDecoder(helper.getSchema(), inStream); | ||
datum = (GenericRecord) reader.read(datum, jsonDecoder); | ||
ByteArrayOutputStream output = new ByteArrayOutputStream(); | ||
GenericDatumWriter<GenericRecord> writer = helper.getTargetSchema() == null ? new GenericDatumWriter<GenericRecord>( | ||
helper.getSchema()) : new GenericDatumWriter<GenericRecord>(helper.getTargetSchema()); | ||
Encoder encoder = EncoderFactory.get().binaryEncoder(output, null); | ||
|
||
writer.write(datum, encoder); | ||
encoder.flush(); | ||
output.close(); | ||
|
||
DatumReader<GenericRecord> avroReader = helper.getTargetSchema() == null ? new GenericDatumReader<GenericRecord>( | ||
helper.getSchema()) : new GenericDatumReader<GenericRecord>(helper.getTargetSchema()); | ||
return new KafkaAvroMessageDecoder.CamusAvroWrapper((GenericData.Record) avroReader.read(null, | ||
this.decoderFactory.binaryDecoder(output.toByteArray(), 0, output.toByteArray().length, null))); | ||
} catch (RuntimeException e) { | ||
log.error("Caught exception while parsing JSON string '" + payloadString + "'."); | ||
throw new RuntimeException(e); | ||
} catch (IOException e) { | ||
throw new MessageDecoderException(e); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters