Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CALCITE-5899] Add HAMMING_DISTANCE function (enabled in Presto Library) #3351

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_BASE32;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_BASE64;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_HEX;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.HAMMING_DISTANCE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ILIKE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_DEPTH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_INSERT;
Expand Down Expand Up @@ -550,6 +551,7 @@ Builder populate() {
defineMethod(LEVENSHTEIN, BuiltInMethod.LEVENSHTEIN.method, NullPolicy.STRICT);
defineMethod(SPLIT, "split", NullPolicy.STRICT);
defineMethod(PARSE_URL, BuiltInMethod.PARSE_URL.method, NullPolicy.STRICT);
defineMethod(HAMMING_DISTANCE, BuiltInMethod.HAMMING_DISTANCE.method, NullPolicy.STRICT);

map.put(TRIM, new TrimImplementor());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,9 @@ ExInst<CalciteException> illegalArgumentForTableFunctionCall(String a0,
@BaseMessage("Array index {0,number,#} is out of bounds")
ExInst<CalciteException> arrayIndexOutOfBounds(int idx);

@BaseMessage("The input strings to hamming_distance function must have the same length")
ExInst<CalciteException> illegalArgumentsInHammingDistanceFunc();

@BaseMessage("Substring error: negative substring length not allowed")
ExInst<CalciteException> illegalNegativeSubstringLength();

Expand Down
22 changes: 22 additions & 0 deletions core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,28 @@ public static String translateWithCharset(String s, String transcodingName) {
return extractValue;
}

/**
* SQL {@code HAMMING_DISTANCE} function.
*/
public static @Nullable Long hammingDistance(
@Nullable String a, @Nullable String b) {
if (a == null || b == null) {
return null;
}

if (a.length() != b.length()) {
throw RESOURCE.illegalArgumentsInHammingDistanceFunc().ex();
}

int distance = 0;
for (int i = 0; i < a.length(); i++) {
if (a.charAt(i) != b.charAt(i)) {
distance++;
}
}
return (long) distance;
}

/** SQL {@code RTRIM} function applied to string. */
public static String rtrim(String s) {
return trim(false, true, " ", s);
Expand Down
7 changes: 5 additions & 2 deletions core/src/main/java/org/apache/calcite/sql/fun/SqlLibrary.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ public enum SqlLibrary {
POSTGRESQL("p", "postgresql"),
/** A collection of operators that are in Apache Spark but not in standard
* SQL. */
SPARK("s", "spark");
SPARK("s", "spark"),
/** A collection of operators that are in Presto but not in standard
* SQL. */
PRESTO("pr", "presto");

/** Map from {@link Enum#name() name} and {@link #fun} to library. */
public static final Map<String, SqlLibrary> MAP;
Expand All @@ -97,7 +100,7 @@ public List<SqlLibrary> children() {
switch (this) {
case ALL:
return ImmutableList.of(BIG_QUERY, CALCITE, HIVE, MSSQL, MYSQL, ORACLE,
POSTGRESQL, SPARK);
POSTGRESQL, SPARK, PRESTO);
default:
return ImmutableList.of();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import static org.apache.calcite.sql.fun.SqlLibrary.MYSQL;
import static org.apache.calcite.sql.fun.SqlLibrary.ORACLE;
import static org.apache.calcite.sql.fun.SqlLibrary.POSTGRESQL;
import static org.apache.calcite.sql.fun.SqlLibrary.PRESTO;
import static org.apache.calcite.sql.fun.SqlLibrary.SPARK;
import static org.apache.calcite.util.Static.RESOURCE;

Expand Down Expand Up @@ -1876,4 +1877,14 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding
ReturnTypes.INTEGER_NULLABLE,
OperandTypes.or(OperandTypes.CHARACTER, OperandTypes.BINARY),
SqlFunctionCategory.NUMERIC);

/**
* The "HAMMING_DISTANCE(str1, str2)" function.
*/
@LibraryOperator(libraries = {PRESTO})
public static final SqlFunction HAMMING_DISTANCE =
SqlBasicFunction.create("HAMMING_DISTANCE",
ReturnTypes.BIGINT_NULLABLE,
OperandTypes.STRING_STRING,
SqlFunctionCategory.NUMERIC);
}
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ public enum BuiltInMethod {
STRING_CONCAT_WITH_NULL(SqlFunctions.class, "concatWithNull", String.class,
String.class),
PARSE_URL(SqlFunctions.class, "parseUrl", String.class, String.class, String.class),
HAMMING_DISTANCE(SqlFunctions.class, "hammingDistance", String.class, String.class),
MULTI_STRING_CONCAT(SqlFunctions.class, "concatMulti", String[].class),
MULTI_STRING_CONCAT_WITH_NULL(SqlFunctions.class, "concatMultiWithNull",
String[].class),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ FunctionNotFound=Function ''{0}'' not found
DialectDoesNotSupportFeature=Dialect does not support feature: ''{0}''
IllegalNegativePadLength=Second argument for LPAD/RPAD must not be negative
IllegalEmptyPadPattern=Third argument (pad pattern) for LPAD/RPAD must not be empty
IllegalArgumentsInHammingDistanceFunc=The input strings to hamming_distance function must have the same length
IllegalNegativeSubstringLength=Substring error: negative substring length not allowed
IllegalArgumentsInMapFromArraysFunc=Illegal arguments: The length of the keys array {0,number,#} is not equal to the length of the values array {1,number,#} in MAP_FROM_ARRAYS function
TrimError=Trim error: trim character must be exactly 1 character
Expand Down
8 changes: 4 additions & 4 deletions core/src/test/java/org/apache/calcite/util/UtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -951,16 +951,16 @@ private List<Integer> makeConsList(int start, int end) {

assertThat(SqlLibrary.expand(ImmutableList.of(a)),
hasToString("[ALL, BIG_QUERY, CALCITE, HIVE, MSSQL, MYSQL, ORACLE, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SPARK, PRESTO]"));
assertThat(SqlLibrary.expand(ImmutableList.of(a, c)),
hasToString("[ALL, BIG_QUERY, CALCITE, HIVE, MSSQL, MYSQL, ORACLE, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SPARK, PRESTO]"));
assertThat(SqlLibrary.expand(ImmutableList.of(c, a)),
hasToString("[CALCITE, ALL, BIG_QUERY, HIVE, MSSQL, MYSQL, ORACLE, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SPARK, PRESTO]"));
assertThat(SqlLibrary.expand(ImmutableList.of(c, o, a)),
hasToString("[CALCITE, ORACLE, ALL, BIG_QUERY, HIVE, MSSQL, MYSQL, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SPARK, PRESTO]"));
assertThat(SqlLibrary.expand(ImmutableList.of(o, c, o)),
hasToString("[ORACLE, CALCITE]"));

Expand Down
1 change: 1 addition & 0 deletions site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2729,6 +2729,7 @@ BigQuery's type system uses confusingly different names for types and functions:
| b | FORMAT_TIME(string, time) | Formats *time* according to the specified format *string*
| b | FORMAT_TIMESTAMP(string timestamp) | Formats *timestamp* according to the specified format *string*
| b o | GREATEST(expr [, expr ]*) | Returns the greatest of the expressions
| pr | HAMMING_DISTANCE(string1, string2) | Returns the Hamming distance between *string1* and *string2*
| b h s | IF(condition, value1, value2) | Returns *value1* if *condition* is TRUE, *value2* otherwise
| b | IFNULL(value1, value2) | Equivalent to `NVL(value1, value2)`
| b o | INSTR(string, substring [, from [, occurrence ] ]) | Returns the position of *substring* in *string*, searching starting at *from* (default 1), and until locating the nth *occurrence* (default 1) of *substring*
Expand Down
44 changes: 44 additions & 0 deletions testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4052,6 +4052,50 @@ static void checkRlikeFails(SqlOperatorFixture f) {
f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
}

/**
* Tests for {@code HAMMING_DISTANCE} function for Presto.
*/
@Test void testHammingDistance() {
final SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.PRESTO);
f.setFor(SqlLibraryOperators.HAMMING_DISTANCE);
f.checkScalarExact("hamming_distance('', '')", "BIGINT NOT NULL", "0");
f.checkScalarExact("hamming_distance('a', 'a')", "BIGINT NOT NULL", "0");
f.checkScalarExact("hamming_distance('hello', 'hello')", "BIGINT NOT NULL", "0");
f.checkScalarExact("hamming_distance('like', 'hate')", "BIGINT NOT NULL", "3");
f.checkScalarExact("hamming_distance('hello', 'world')", "BIGINT NOT NULL", "4");

// Test for null
f.checkNull("hamming_distance(null, null)");
f.checkNull("hamming_distance(null, 'hello')");
f.checkNull("hamming_distance('hello', null)");

// Test for unicode
f.checkScalarExact("hamming_distance(_UTF8'hello na\u00EFve world', 'hello naive world')",
"BIGINT NOT NULL", "1");
f.checkScalarExact(
"hamming_distance(_UTF8'\u4FE1\u5FF5,\u7231,\u5E0C\u671B', _UTF8'\u4FE1\u4EF0,\u7231,\u5E0C\u671B')",
"BIGINT NOT NULL", "1");
f.checkScalarExact(
"hamming_distance(_UTF8'\u4F11\u5FF5,\u7231,\u5E0C\u671B', _UTF8'\u4FE1\u5FF5,\u7231,\u5E0C\u671B')",
"BIGINT NOT NULL", "1");

// Test for invalid arguments
f.checkFails("^hamming_distance('hello', 'world', 'extra')^",
"Invalid number of arguments to function "
+ "'HAMMING_DISTANCE'\\. Was expecting 2 arguments", false);
f.checkFails("hamming_distance('hello', '')",
"The input strings to hamming_distance function must have the same length", true);
f.checkFails("hamming_distance('', 'hello')",
"The input strings to hamming_distance function must have the same length", true);
f.checkFails("hamming_distance('hello', 'o')",
"The input strings to hamming_distance function must have the same length", true);
f.checkFails("hamming_distance(_UTF8'hello na\u00EFve world', 'hello na:ive world')",
"The input strings to hamming_distance function must have the same length", true);
f.checkFails("hamming_distance("
+ "_UTF8'\u4FE1\u5FF5,\u7231,\u5E0C\u671B', _UTF8'\u4FE1\u5FF5\u5E0C\u671B')",
"The input strings to hamming_distance function must have the same length", true);
}

@Test void testToBase64() {
final SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.MYSQL);
f.setFor(SqlLibraryOperators.TO_BASE64);
Expand Down