Skip to content

Commit

Permalink
[QTL] adding listDelimiter to lookup parser spec (apache#2941)
Browse files Browse the repository at this point in the history
* adding listDelimiter to lookup parser spec

* cleaning code
  • Loading branch information
b-slim authored and nishantmonu51 committed May 10, 2016
1 parent 0c04650 commit 45b2e65
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ truck,something3,buck
|`keyColumn`|The name of the column containing the key|no|The first column|
|`valueColumn`|The name of the column containing the value|no|The second column|
|`delimiter`|The delimiter in the file|no|tab (`\t`)|
|`listDelimiter`|The list delimiter in the file|no| (`\u0001`)|


*example input*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,13 +356,15 @@ public static class TSVFlatDataParser implements FlatDataParser
private final Parser<String, String> parser;
private final List<String> columns;
private final String delimiter;
private final String listDelimiter;
private final String keyColumn;
private final String valueColumn;

@JsonCreator
public TSVFlatDataParser(
@JsonProperty("columns") List<String> columns,
@JsonProperty("delimiter") String delimiter,
@JsonProperty("listDelimiter") String listDelimiter,
@JsonProperty("keyColumn") final String keyColumn,
@JsonProperty("valueColumn") final String valueColumn
)
Expand All @@ -372,8 +374,8 @@ public TSVFlatDataParser(
"Must specify more than one column to have a key value pair"
);
final DelimitedParser delegate = new DelimitedParser(
Optional.fromNullable(Strings.isNullOrEmpty(delimiter) ? null : delimiter),
Optional.<String>absent()
Optional.fromNullable(Strings.emptyToNull(delimiter)),
Optional.fromNullable(Strings.emptyToNull(listDelimiter))
);
Preconditions.checkArgument(
!(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)),
Expand All @@ -382,6 +384,7 @@ public TSVFlatDataParser(
delegate.setFieldNames(columns);
this.columns = columns;
this.delimiter = delimiter;
this.listDelimiter = listDelimiter;
this.keyColumn = Strings.isNullOrEmpty(keyColumn) ? columns.get(0) : keyColumn;
this.valueColumn = Strings.isNullOrEmpty(valueColumn) ? columns.get(1) : valueColumn;
Preconditions.checkArgument(
Expand Down Expand Up @@ -418,6 +421,12 @@ public String getValueColumn()
return this.valueColumn;
}

@JsonProperty
public String getListDelimiter()
{
return listDelimiter;
}

@JsonProperty
public String getDelimiter()
{
Expand All @@ -434,9 +443,10 @@ public Parser<String, String> getParser()
public String toString()
{
return String.format(
"TSVFlatDataParser = { columns = %s, delimiter = '%s', keyColumn = %s, valueColumn = %s }",
"TSVFlatDataParser = { columns = %s, delimiter = '%s', listDelimiter = '%s',keyColumn = %s, valueColumn = %s }",
Arrays.toString(columns.toArray()),
delimiter,
listDelimiter,
keyColumn,
valueColumn
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,19 +121,31 @@ public void testTSV()
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
ImmutableList.of("col1", "col2", "col3"),
"|",
"col2",
null, "col2",
"col3"
);
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parse("A|B|C"));
}

@Test
public void testWithListDelimiterTSV()
{
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
ImmutableList.of("col1", "col2", "col3"),
"\\u0001",
"\\u0002", "col2",
"col3"
);
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parse("A\\u0001B\\u0001C"));
}

@Test(expected = IllegalArgumentException.class)
public void testBadTSV()
{
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
ImmutableList.of("col1", "col2", "col3fdsfds"),
",",
"col2",
null, "col2",
"col3"
);
Map<String, String> map = parser.getParser().parse("A,B,C");
Expand All @@ -147,7 +159,7 @@ public void testBadTSV2()
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
ImmutableList.of("col1", "col2", "col3"),
",",
"col2",
null, "col2",
"col3"
);
Map<String, String> map = parser.getParser().parse("A");
Expand Down Expand Up @@ -293,7 +305,7 @@ public void testSimpleJSONSerDe() throws IOException
),
new URIExtractionNamespace.ObjectMapperFlatDataParser(mapper),
new URIExtractionNamespace.JSONFlatDataParser(mapper, "keyField", "valueField"),
new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", "A", "B")
new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", null, "A", "B")
)) {
final String str = mapper.writeValueAsString(parser);
final URIExtractionNamespace.FlatDataParser parser2 = mapper.readValue(
Expand All @@ -318,7 +330,7 @@ public void testSimpleToString() throws IOException
),
new URIExtractionNamespace.ObjectMapperFlatDataParser(mapper),
new URIExtractionNamespace.JSONFlatDataParser(mapper, "keyField", "valueField"),
new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", "A", "B")
new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", null, "A", "B")
)) {
Assert.assertFalse(parser.toString().contains("@"));
}
Expand Down

0 comments on commit 45b2e65

Please sign in to comment.