Skip to content

Commit

Permalink
Merge pull request eugenp#17840 from AndreiBranza/BAEL-8046-get-the-s…
Browse files Browse the repository at this point in the history
…chema-from-an-avro-file

BAEL-8046 | Article code
  • Loading branch information
davidmartinezbarua authored Oct 21, 2024
2 parents d24934a + 62a6de0 commit 9751466
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 1 deletion.
2 changes: 1 addition & 1 deletion apache-libraries/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
<bval.version>3.0.1</bval.version>
<jakarta.validation.validation-api.version>3.1.0</jakarta.validation.validation-api.version><opennlp.opennlp-tools.version>1.8.4</opennlp.opennlp-tools.version>
<solr.solr-solrj.version>6.4.0</solr.solr-solrj.version>
<apache.avro.version>1.11.3</apache.avro.version>
<apache.avro.version>1.12.0</apache.avro.version>
</properties>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.baeldung.apache.avro.schemafromavrofile;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class AvroSchemaExtractor {

public static Schema extractSchema(String avroFilePath) throws IOException {

File avroFile = new File(avroFilePath);
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();

try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader)) {
return dataFileReader.getSchema();
}
}

public static List<GenericRecord> readAvroData(String avroFilePath) throws IOException {

File avroFile = new File(avroFilePath);
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
List<GenericRecord> records = new ArrayList<>();

try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datumReader)) {
GenericRecord record = null;
while (dataFileReader.hasNext()) {
record = dataFileReader.next(record);
records.add(record);
}
}
return records;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package com.baeldung.apache.avro.schemafromavrofile;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.List;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;

public class AvroSchemaExtractorUnitTest {

@TempDir
Path tempDir;

private File avroFile;
private Schema schema;

@BeforeEach
void setUp() throws IOException {
schema = new Schema.Parser().parse("""
{
"type": "record",
"name": "User",
"fields": [
{"name": "name", "type": "string"},
{"name": "age", "type": "int"}
]
}
""");
avroFile = tempDir.resolve("test.avro").toFile();

GenericRecord user1 = new GenericData.Record(schema);
user1.put("name", "John Doe");
user1.put("age", 30);

try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
dataFileWriter.create(schema, avroFile);
dataFileWriter.append(user1);
}
}

@Test
void whenSchemaIsExistent_thenItIsExtractedCorrectly() throws IOException {
Schema extractedSchema = AvroSchemaExtractor.extractSchema(avroFile.getPath());

assertEquals(schema, extractedSchema);
}

@Test
void whenAvroFileHasContent_thenItIsReadCorrectly() throws IOException {
List<GenericRecord> records = AvroSchemaExtractor.readAvroData(avroFile.getPath());

assertEquals("John Doe", records.get(0).get(0).toString());
}
}

0 comments on commit 9751466

Please sign in to comment.