Skip to content

Commit

Permalink
[SPARK-18871][SQL][TESTS] New test cases for IN/NOT IN subquery 3rd b…
Browse files Browse the repository at this point in the history
…atch

## What changes were proposed in this pull request?

This is 3ird batch of test case for IN/NOT IN subquery. In this PR, it has these test files:

`in-having.sql`
`in-joins.sql`
`in-multiple-columns.sql`

These are the queries and results from running on DB2.
[in-having DB2 version](https://github.com/apache/spark/files/772668/in-having.sql.db2.txt)
[output of in-having](https://github.com/apache/spark/files/772670/in-having.sql.db2.out.txt)
[in-joins DB2 version](https://github.com/apache/spark/files/772672/in-joins.sql.db2.txt)
[output of in-joins](https://github.com/apache/spark/files/772673/in-joins.sql.db2.out.txt)
[in-multiple-columns DB2 version](https://github.com/apache/spark/files/772678/in-multiple-columns.sql.db2.txt)
[output of in-multiple-columns](https://github.com/apache/spark/files/772680/in-multiple-columns.sql.db2.out.txt)

## How was this patch tested?
This pr is adding new test cases. We compare the result from spark with the result from another RDBMS(We used DB2 LUW). If the results are the same, we assume the result is correct.

Author: Kevin Yu <[email protected]>

Closes apache#16841 from kevinyu98/spark-18871-33.
  • Loading branch information
kevinyu98 authored and gatorsmile committed Feb 16, 2017
1 parent f041e55 commit 3871d94
Show file tree
Hide file tree
Showing 6 changed files with 1,297 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
-- A test suite for IN HAVING in parent side, subquery, and both predicate subquery
-- It includes correlated cases.

create temporary view t1 as select * from values
("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);

create temporary view t2 as select * from values
("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);

create temporary view t3 as select * from values
("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);

-- correlated IN subquery
-- HAVING in the subquery
-- TC 01.01
SELECT t1a,
t1b,
t1h
FROM t1
WHERE t1b IN (SELECT t2b
FROM t2
GROUP BY t2b
HAVING t2b < 10);

-- TC 01.02
SELECT t1a,
t1b,
t1c
FROM t1
WHERE t1b IN (SELECT Min(t2b)
FROM t2
WHERE t1a = t2a
GROUP BY t2b
HAVING t2b > 1);

-- HAVING in the parent
-- TC 01.03
SELECT t1a, t1b, t1c
FROM t1
WHERE t1b IN (SELECT t2b
FROM t2
WHERE t1c < t2c)
GROUP BY t1a, t1b, t1c
HAVING t1b < 10;

-- TC 01.04
SELECT t1a, t1b, t1c
FROM t1
WHERE t1b IN (SELECT t2b
FROM t2
WHERE t1c = t2c)
GROUP BY t1a, t1b, t1c
HAVING COUNT (DISTINCT t1b) < 10;

-- BOTH
-- TC 01.05
SELECT Count(DISTINCT( t1a )),
t1b
FROM t1
WHERE t1c IN (SELECT t2c
FROM t2
WHERE t1a = t2a
GROUP BY t2c
HAVING t2c > 10)
GROUP BY t1b
HAVING t1b >= 8;

-- TC 01.06
SELECT t1a,
Max(t1b)
FROM t1
WHERE t1b > 0
GROUP BY t1a
HAVING t1a IN (SELECT t2a
FROM t2
WHERE t2b IN (SELECT t3b
FROM t3
WHERE t2c = t3c)
);

-- HAVING clause with NOT IN
-- TC 01.07
SELECT t1a,
t1c,
Min(t1d)
FROM t1
WHERE t1a NOT IN (SELECT t2a
FROM t2
GROUP BY t2a
HAVING t2a > 'val2a')
GROUP BY t1a, t1c
HAVING Min(t1d) > t1c;

-- TC 01.08
SELECT t1a,
t1b
FROM t1
WHERE t1d NOT IN (SELECT t2d
FROM t2
WHERE t1a = t2a
GROUP BY t2c, t2d
HAVING t2c > 8)
GROUP BY t1a, t1b
HAVING t1b < 10;

-- TC 01.09
SELECT t1a,
Max(t1b)
FROM t1
WHERE t1b > 0
GROUP BY t1a
HAVING t1a NOT IN (SELECT t2a
FROM t2
WHERE t2b > 3);

Loading

0 comments on commit 3871d94

Please sign in to comment.