forked from apache/flink
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FLINK-2025] add support for booleans in csv parser
The following values are parsed as booleans: "true" or "1" -> true "false" or "0" -> false All checks are performed case-insensitive. This closes apache#685.
- Loading branch information
Showing
7 changed files
with
282 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
96 changes: 96 additions & 0 deletions
96
flink-core/src/main/java/org/apache/flink/types/parser/BooleanParser.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.flink.types.parser; | ||
|
||
public class BooleanParser extends FieldParser<Boolean> { | ||
|
||
private boolean result; | ||
|
||
/** Values for true and false respectively. Must be lower case. */ | ||
private static final byte[][] TRUE = new byte[][] { | ||
"true".getBytes(), | ||
"1".getBytes() | ||
}; | ||
private static final byte[][] FALSE = new byte[][] { | ||
"false".getBytes(), | ||
"0".getBytes() | ||
}; | ||
|
||
@Override | ||
public int parseField(byte[] bytes, int startPos, int limit, byte[] delim, Boolean reuse) { | ||
|
||
final int delimLimit = limit - delim.length + 1; | ||
|
||
int i = startPos; | ||
|
||
while (i < limit) { | ||
if (i < delimLimit && delimiterNext(bytes, i, delim)) { | ||
break; | ||
} | ||
i++; | ||
} | ||
|
||
for (byte[] aTRUE : TRUE) { | ||
if (byteArrayEquals(bytes, startPos, i - startPos, aTRUE)) { | ||
result = true; | ||
return (i == limit) ? limit : i + delim.length; | ||
} | ||
} | ||
|
||
for (byte[] aFALSE : FALSE) { | ||
if (byteArrayEquals(bytes, startPos, i - startPos, aFALSE)) { | ||
result = false; | ||
return (i == limit) ? limit : i + delim.length; | ||
} | ||
} | ||
|
||
setErrorState(ParseErrorState.BOOLEAN_INVALID); | ||
return -1; | ||
} | ||
|
||
@Override | ||
public Boolean getLastResult() { | ||
return result; | ||
} | ||
|
||
@Override | ||
public Boolean createValue() { | ||
return false; | ||
} | ||
|
||
/** | ||
* Checks if a part of a byte array matches another byte array with chars (case-insensitive). | ||
* @param source The source byte array. | ||
* @param start The offset into the source byte array. | ||
* @param length The length of the match. | ||
* @param other The byte array which is fully compared to the part of the source array. | ||
* @return true if other can be found in the specified part of source, false otherwise. | ||
*/ | ||
private static boolean byteArrayEquals(byte[] source, int start, int length, byte[] other) { | ||
if (length != other.length) { | ||
return false; | ||
} | ||
for (int i = 0; i < other.length; i++) { | ||
if (Character.toLowerCase(source[i + start]) != other[i]) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
} |
47 changes: 47 additions & 0 deletions
47
flink-core/src/main/java/org/apache/flink/types/parser/BooleanValueParser.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.flink.types.parser; | ||
|
||
import org.apache.flink.types.BooleanValue; | ||
|
||
public class BooleanValueParser extends FieldParser<BooleanValue> { | ||
|
||
private BooleanParser parser = new BooleanParser(); | ||
|
||
private BooleanValue result; | ||
|
||
@Override | ||
public int parseField(byte[] bytes, int startPos, int limit, byte[] delim, BooleanValue reuse) { | ||
int returnValue = parser.parseField(bytes, startPos, limit, delim, reuse.getValue()); | ||
setErrorState(parser.getErrorState()); | ||
reuse.setValue(parser.getLastResult()); | ||
result = reuse; | ||
return returnValue; | ||
} | ||
|
||
@Override | ||
public BooleanValue getLastResult() { | ||
return result; | ||
} | ||
|
||
@Override | ||
public BooleanValue createValue() { | ||
return new BooleanValue(false); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
61 changes: 61 additions & 0 deletions
61
flink-core/src/test/java/org/apache/flink/types/parser/BooleanParserTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
|
||
package org.apache.flink.types.parser; | ||
|
||
|
||
public class BooleanParserTest extends ParserTestBase<Boolean> { | ||
|
||
|
||
@Override | ||
public String[] getValidTestValues() { | ||
return new String[] { | ||
"true", "false", "0", "1", "TRUE", "FALSE", "True", "False" | ||
}; | ||
} | ||
|
||
@Override | ||
public Boolean[] getValidTestResults() { | ||
return new Boolean[] { | ||
true, false, false, true, true, false, true, false | ||
}; | ||
} | ||
|
||
@Override | ||
public String[] getInvalidTestValues() { | ||
return new String[]{ | ||
"yes", "no", "2", "-1", "wahr", "falsch", "", "asdf" | ||
}; | ||
} | ||
|
||
@Override | ||
public boolean allowsEmptyField() { | ||
return false; | ||
} | ||
|
||
@Override | ||
public FieldParser<Boolean> getParser() { | ||
return new BooleanParser(); | ||
} | ||
|
||
@Override | ||
public Class<Boolean> getTypeClass() { | ||
return Boolean.class; | ||
} | ||
} |
64 changes: 64 additions & 0 deletions
64
flink-core/src/test/java/org/apache/flink/types/parser/BooleanValueParserTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
|
||
package org.apache.flink.types.parser; | ||
|
||
|
||
import org.apache.flink.types.BooleanValue; | ||
|
||
public class BooleanValueParserTest extends ParserTestBase<BooleanValue> { | ||
|
||
|
||
@Override | ||
public String[] getValidTestValues() { | ||
return new String[] { | ||
"true", "false", "0", "1", "TRUE", "FALSE", "True", "False" | ||
}; | ||
} | ||
|
||
@Override | ||
public BooleanValue[] getValidTestResults() { | ||
return new BooleanValue[] { | ||
new BooleanValue(true), new BooleanValue(false), new BooleanValue(false), new BooleanValue(true), | ||
new BooleanValue(true), new BooleanValue(false), new BooleanValue(true), new BooleanValue(false) | ||
}; | ||
} | ||
|
||
@Override | ||
public String[] getInvalidTestValues() { | ||
return new String[]{ | ||
"yes", "no", "2", "-1", "wahr", "falsch", "", "asdf" | ||
}; | ||
} | ||
|
||
@Override | ||
public boolean allowsEmptyField() { | ||
return false; | ||
} | ||
|
||
@Override | ||
public FieldParser<BooleanValue> getParser() { | ||
return new BooleanValueParser(); | ||
} | ||
|
||
@Override | ||
public Class<BooleanValue> getTypeClass() { | ||
return BooleanValue.class; | ||
} | ||
} |