forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-36736][SQL] Support ILIKE (ALL | ANY | SOME) - case insensitiv…
…e LIKE ### What changes were proposed in this pull request? In the PR, I propose to support a case-insensitive variant of the `LIKE (ALL | ANY | SOME)` expression - `ILIKE`. In this way, Spark's users can match strings to single pattern in the case-insensitive manner. For example: ```sql spark-sql> create table ilike_example(subject varchar(20)); spark-sql> insert into ilike_example values > ('jane doe'), > ('Jane Doe'), > ('JANE DOE'), > ('John Doe'), > ('John Smith'); spark-sql> select * > from ilike_example > where subject ilike any ('jane%', '%SMITH') > order by subject; JANE DOE Jane Doe John Smith jane doe ``` The syntax of `ILIKE` is similar to `LIKE`: ``` str NOT? ILIKE (ANY | SOME | ALL) (pattern+) ``` ### Why are the changes needed? 1. To improve user experience with Spark SQL. No need to use `lower(col_name)` in where clauses. 2. To make migration from other popular DMBSs to Spark SQL easier. DBMSs below support `ilike` in SQL: - [Snowflake](https://docs.snowflake.com/en/sql-reference/functions/ilike.html#ilike) - [PostgreSQL](https://www.postgresql.org/docs/12/functions-matching.html) - [CockroachDB](https://www.cockroachlabs.com/docs/stable/functions-and-operators.html) ### Does this PR introduce _any_ user-facing change? No, it doesn't. The PR **extends** existing APIs. ### How was this patch tested? 1. By running of expression examples via: ``` $ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite" ``` 2. Added new test to test parsing of `ILIKE`: ``` $ build/sbt "test:testOnly *.ExpressionParserSuite" ``` 3. Via existing test suites: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z ilike-any.sql" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z ilike-all.sql" ``` Closes apache#33966 from MaxGekk/ilike-any. Authored-by: Max Gekk <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
- Loading branch information
Showing
9 changed files
with
414 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 41 additions & 0 deletions
41
sql/core/src/test/resources/sql-tests/inputs/ilike-all.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
-- test cases for ilike all | ||
|
||
CREATE OR REPLACE TEMPORARY VIEW ilike_all_table AS SELECT * FROM (VALUES | ||
('gOOgle', '%oo%'), | ||
('facebook', '%OO%'), | ||
('liNkedin', '%In')) | ||
as t1(company, pat); | ||
|
||
SELECT company FROM ilike_all_table WHERE company ILIKE ALL ('%oO%', '%Go%'); | ||
|
||
SELECT company FROM ilike_all_table WHERE company ILIKE ALL ('microsoft', '%yoo%'); | ||
|
||
SELECT | ||
company, | ||
CASE | ||
WHEN company ILIKE ALL ('%oo%', '%GO%') THEN 'Y' | ||
ELSE 'N' | ||
END AS is_available, | ||
CASE | ||
WHEN company ILIKE ALL ('%OO%', 'go%') OR company ILIKE ALL ('%IN', 'ms%') THEN 'Y' | ||
ELSE 'N' | ||
END AS mix | ||
FROM ilike_all_table ; | ||
|
||
-- Mix test with constant pattern and column value | ||
SELECT company FROM ilike_all_table WHERE company ILIKE ALL ('%oo%', pat); | ||
|
||
-- not ilike all test | ||
SELECT company FROM ilike_all_table WHERE company NOT ILIKE ALL ('%oo%', '%In', 'Fa%'); | ||
SELECT company FROM ilike_all_table WHERE company NOT ILIKE ALL ('microsoft', '%yoo%'); | ||
SELECT company FROM ilike_all_table WHERE company NOT ILIKE ALL ('%oo%', 'fA%'); | ||
SELECT company FROM ilike_all_table WHERE NOT company ILIKE ALL ('%oO%', 'fa%'); | ||
|
||
-- null test | ||
SELECT company FROM ilike_all_table WHERE company ILIKE ALL ('%OO%', NULL); | ||
SELECT company FROM ilike_all_table WHERE company NOT ILIKE ALL ('%Oo%', NULL); | ||
SELECT company FROM ilike_all_table WHERE company ILIKE ALL (NULL, NULL); | ||
SELECT company FROM ilike_all_table WHERE company NOT ILIKE ALL (NULL, NULL); | ||
|
||
-- negative case | ||
SELECT company FROM ilike_any_table WHERE company ILIKE ALL (); |
41 changes: 41 additions & 0 deletions
41
sql/core/src/test/resources/sql-tests/inputs/ilike-any.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
-- test cases for ilike any | ||
|
||
CREATE OR REPLACE TEMPORARY VIEW ilike_any_table AS SELECT * FROM (VALUES | ||
('Google', '%Oo%'), | ||
('FaceBook', '%oO%'), | ||
('linkedIn', '%IN')) | ||
as t1(company, pat); | ||
|
||
SELECT company FROM ilike_any_table WHERE company ILIKE ANY ('%oo%', '%IN', 'fA%'); | ||
|
||
SELECT company FROM ilike_any_table WHERE company ILIKE ANY ('microsoft', '%yoo%'); | ||
|
||
select | ||
company, | ||
CASE | ||
WHEN company ILIKE ANY ('%oO%', '%IN', 'Fa%') THEN 'Y' | ||
ELSE 'N' | ||
END AS is_available, | ||
CASE | ||
WHEN company ILIKE ANY ('%OO%', 'fa%') OR company ILIKE ANY ('%in', 'MS%') THEN 'Y' | ||
ELSE 'N' | ||
END AS mix | ||
FROM ilike_any_table; | ||
|
||
-- Mix test with constant pattern and column value | ||
SELECT company FROM ilike_any_table WHERE company ILIKE ANY ('%zZ%', pat); | ||
|
||
-- not ilike any test | ||
SELECT company FROM ilike_any_table WHERE company NOT ILIKE ANY ('%oO%', '%iN', 'fa%'); | ||
SELECT company FROM ilike_any_table WHERE company NOT ILIKE ANY ('microsoft', '%yOo%'); | ||
SELECT company FROM ilike_any_table WHERE company NOT ILIKE ANY ('%oo%', 'Fa%'); | ||
SELECT company FROM ilike_any_table WHERE NOT company ILIKE ANY ('%OO%', 'fa%'); | ||
|
||
-- null test | ||
SELECT company FROM ilike_any_table WHERE company ILIKE ANY ('%oO%', NULL); | ||
SELECT company FROM ilike_any_table WHERE company NOT ILIKE ANY ('%oo%', NULL); | ||
SELECT company FROM ilike_any_table WHERE company ILIKE ANY (NULL, NULL); | ||
SELECT company FROM ilike_any_table WHERE company NOT ILIKE ANY (NULL, NULL); | ||
|
||
-- negative case | ||
SELECT company FROM ilike_any_table WHERE company ILIKE ANY (); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
140 changes: 140 additions & 0 deletions
140
sql/core/src/test/resources/sql-tests/results/ilike-all.sql.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
-- Automatically generated by SQLQueryTestSuite | ||
-- Number of queries: 14 | ||
|
||
|
||
-- !query | ||
CREATE OR REPLACE TEMPORARY VIEW ilike_all_table AS SELECT * FROM (VALUES | ||
('gOOgle', '%oo%'), | ||
('facebook', '%OO%'), | ||
('liNkedin', '%In')) | ||
as t1(company, pat) | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE company ILIKE ALL ('%oO%', '%Go%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE company ILIKE ALL ('microsoft', '%yoo%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT | ||
company, | ||
CASE | ||
WHEN company ILIKE ALL ('%oo%', '%GO%') THEN 'Y' | ||
ELSE 'N' | ||
END AS is_available, | ||
CASE | ||
WHEN company ILIKE ALL ('%OO%', 'go%') OR company ILIKE ALL ('%IN', 'ms%') THEN 'Y' | ||
ELSE 'N' | ||
END AS mix | ||
FROM ilike_all_table | ||
-- !query schema | ||
struct<company:string,is_available:string,mix:string> | ||
-- !query output | ||
facebook N N | ||
gOOgle Y Y | ||
liNkedin N N | ||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE company ILIKE ALL ('%oo%', pat) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE company NOT ILIKE ALL ('%oo%', '%In', 'Fa%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE company NOT ILIKE ALL ('microsoft', '%yoo%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE company NOT ILIKE ALL ('%oo%', 'fA%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE NOT company ILIKE ALL ('%oO%', 'fa%') | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE company ILIKE ALL ('%OO%', NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE company NOT ILIKE ALL ('%Oo%', NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE company ILIKE ALL (NULL, NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_all_table WHERE company NOT ILIKE ALL (NULL, NULL) | ||
-- !query schema | ||
struct<company:string> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT company FROM ilike_any_table WHERE company ILIKE ALL () | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
org.apache.spark.sql.catalyst.parser.ParseException | ||
|
||
Expected something between '(' and ')'.(line 1, pos 50) | ||
|
||
== SQL == | ||
SELECT company FROM ilike_any_table WHERE company ILIKE ALL () | ||
--------------------------------------------------^^^ |
Oops, something went wrong.