forked from duckdblabs/db-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
join-clickhouse.sql.in
107 lines (92 loc) · 10.5 KB
/
join-clickhouse.sql.in
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
-- parent sh script 'sed' pre-processing to be removed once sql script will support variables: https://github.com/yandex/ClickHouse/issues/3815
--data_name='DATA_NAME'
--task='join'
--fun='select join'
--cache=1 -- #151
--on_disk=1
-- we list all columns in select statement rather using select * due to #167
/* q1: question='small inner on int' */
SET log_queries = 1;
CREATE TABLE ans ENGINE = Memory AS SELECT id1, x.id2, x.id3, x.id4, y.id4, x.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_SMALL AS y USING (id1);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'small inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk
FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id1, x.id2, x.id3, x.id4, y.id4, x.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_SMALL AS y USING (id1);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q1_r1.csv' FORMAT CSVWithNames;
SELECT * FROM ans LIMIT 3;
DROP TABLE ans;
SET log_queries = 1;
CREATE TABLE ans ENGINE = Memory AS SELECT id1, x.id2, x.id3, x.id4, y.id4, x.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_SMALL AS y USING (id1);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'small inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk
FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id1, x.id2, x.id3, x.id4, y.id4, x.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_SMALL AS y USING (id1);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q1_r2.csv' FORMAT CSVWithNames;
SELECT * FROM ans LIMIT 3;
DROP TABLE ans;
/* q2: question='medium inner on int' */
SET log_queries = 1;
CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id2);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk
FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id2);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q2_r1.csv' FORMAT CSVWithNames;
SELECT * FROM ans LIMIT 3;
DROP TABLE ans;
SET log_queries = 1;
CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id2);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk
FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id2);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q2_r2.csv' FORMAT CSVWithNames;
SELECT * FROM ans LIMIT 3;
DROP TABLE ans;
/* q3: question='medium outer on int' */
SET log_queries = 1;
CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x LEFT JOIN RHS_MEDIUM AS y USING (id2);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium outer on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk
FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x LEFT JOIN RHS_MEDIUM AS y USING (id2);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q3_r1.csv' FORMAT CSVWithNames;
SELECT * FROM ans LIMIT 3;
DROP TABLE ans;
SET log_queries = 1;
CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x LEFT JOIN RHS_MEDIUM AS y USING (id2);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium outer on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk
FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x LEFT JOIN RHS_MEDIUM AS y USING (id2);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q3_r2.csv' FORMAT CSVWithNames;
SELECT * FROM ans LIMIT 3;
DROP TABLE ans;
/* q4: question='medium inner on factor' */
SET log_queries = 1;
CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, x.id3, x.id4, y.id4, id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id5);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium inner on factor' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk
FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, x.id3, x.id4, y.id4, id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id5);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q4_r1.csv' FORMAT CSVWithNames;
SELECT * FROM ans LIMIT 3;
DROP TABLE ans;
SET log_queries = 1;
CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, x.id3, x.id4, y.id4, id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id5);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium inner on factor' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk
FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, x.id3, x.id4, y.id4, id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id5);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q4_r2.csv' FORMAT CSVWithNames;
SELECT * FROM ans LIMIT 3;
DROP TABLE ans;
/* q5: question='big inner on int' */
SET log_queries = 1;
CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, id3, x.id4, y.id4, x.id5, y.id5, x.id6, y.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_BIG AS y USING (id3);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'big inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk
FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, id3, x.id4, y.id4, x.id5, y.id5, x.id6, y.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_BIG AS y USING (id3);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q5_r1.csv' FORMAT CSVWithNames;
SELECT * FROM ans LIMIT 3;
DROP TABLE ans;
SET log_queries = 1;
CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, id3, x.id4, y.id4, x.id5, y.id5, x.id6, y.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_BIG AS y USING (id3);
SET log_queries = 0;
SYSTEM FLUSH LOGS;
SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'big inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk
FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, id3, x.id4, y.id4, x.id5, y.id5, x.id6, y.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_BIG AS y USING (id3);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q5_r2.csv' FORMAT CSVWithNames;
SELECT * FROM ans LIMIT 3;
DROP TABLE ans;