forked from ClickHouse/ClickHouse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetMostSubtype.cpp
354 lines (297 loc) · 12.1 KB
/
getMostSubtype.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Common/typeid_cast.h>
#include <DataTypes/getMostSubtype.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypesNumber.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NO_COMMON_TYPE;
}
namespace
{
String getExceptionMessagePrefix(const DataTypes & types)
{
WriteBufferFromOwnString res;
res << "There is no subtype for types ";
bool first = true;
for (const auto & type : types)
{
if (!first)
res << ", ";
first = false;
res << type->getName();
}
return res.str();
}
}
DataTypePtr getMostSubtype(const DataTypes & types, bool throw_if_result_is_nothing, bool force_support_conversion)
{
auto get_nothing_or_throw = [throw_if_result_is_nothing, & types](const std::string & reason)
{
if (throw_if_result_is_nothing)
throw Exception(getExceptionMessagePrefix(types) + reason, ErrorCodes::NO_COMMON_TYPE);
return std::make_shared<DataTypeNothing>();
};
/// Trivial cases
if (types.empty())
{
if (throw_if_result_is_nothing)
throw Exception("There is no common type for empty type list", ErrorCodes::NO_COMMON_TYPE);
return std::make_shared<DataTypeNothing>();
}
if (types.size() == 1)
{
if (throw_if_result_is_nothing && typeid_cast<const DataTypeNothing *>(types[0].get()))
throw Exception("There is no common type for type Nothing", ErrorCodes::NO_COMMON_TYPE);
return types[0];
}
/// All types are equal
{
bool all_equal = true;
for (size_t i = 1, size = types.size(); i < size; ++i)
{
if (!types[i]->equals(*types[0]))
{
all_equal = false;
break;
}
}
if (all_equal)
return types[0];
}
/// Recursive rules
/// If there are Nothing types, result is Nothing
{
for (const auto & type : types)
if (typeid_cast<const DataTypeNothing *>(type.get()))
return get_nothing_or_throw(" because some of them are Nothing");
}
/// For Arrays
{
bool have_array = false;
bool all_arrays = true;
DataTypes nested_types;
nested_types.reserve(types.size());
for (const auto & type : types)
{
if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
{
have_array = true;
nested_types.emplace_back(type_array->getNestedType());
}
else
all_arrays = false;
}
if (have_array)
{
if (!all_arrays)
return get_nothing_or_throw(" because some of them are Array and some of them are not");
return std::make_shared<DataTypeArray>(getMostSubtype(nested_types, false, force_support_conversion));
}
}
/// For tuples
{
bool have_tuple = false;
bool all_tuples = true;
size_t tuple_size = 0;
std::vector<DataTypes> nested_types;
for (const auto & type : types)
{
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
{
if (!have_tuple)
{
tuple_size = type_tuple->getElements().size();
nested_types.resize(tuple_size);
for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
nested_types[elem_idx].reserve(types.size());
}
else if (tuple_size != type_tuple->getElements().size())
return get_nothing_or_throw(" because Tuples have different sizes");
have_tuple = true;
for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
nested_types[elem_idx].emplace_back(type_tuple->getElements()[elem_idx]);
}
else
all_tuples = false;
}
if (have_tuple)
{
if (!all_tuples)
return get_nothing_or_throw(" because some of them are Tuple and some of them are not");
DataTypes common_tuple_types(tuple_size);
for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
common_tuple_types[elem_idx] =
getMostSubtype(nested_types[elem_idx], throw_if_result_is_nothing, force_support_conversion);
return std::make_shared<DataTypeTuple>(common_tuple_types);
}
}
/// For Nullable
{
bool all_nullable = true;
bool have_nullable = false;
DataTypes nested_types;
nested_types.reserve(types.size());
for (const auto & type : types)
{
if (const auto * type_nullable = typeid_cast<const DataTypeNullable *>(type.get()))
{
have_nullable = true;
nested_types.emplace_back(type_nullable->getNestedType());
}
else
{
all_nullable = false;
nested_types.emplace_back(type);
}
}
if (have_nullable)
{
if (all_nullable || force_support_conversion)
return std::make_shared<DataTypeNullable>(getMostSubtype(nested_types, false, force_support_conversion));
return getMostSubtype(nested_types, throw_if_result_is_nothing, force_support_conversion);
}
}
/// Non-recursive rules
/// For String and FixedString, the common type is FixedString.
/// For different FixedStrings, the common type is Nothing.
/// No other types are compatible with Strings. TODO Enums?
{
bool have_string = false;
bool all_strings = true;
DataTypePtr fixed_string_type = nullptr;
for (const auto & type : types)
{
if (isFixedString(type))
{
have_string = true;
if (!fixed_string_type)
fixed_string_type = type;
else if (!type->equals(*fixed_string_type))
return get_nothing_or_throw(" because some of them are FixedStrings with different length");
}
else if (isString(type))
have_string = true;
else
all_strings = false;
}
if (have_string)
{
if (!all_strings)
return get_nothing_or_throw(" because some of them are String/FixedString and some of them are not");
return fixed_string_type ? fixed_string_type : std::make_shared<DataTypeString>();
}
}
/// For Date and DateTime, the common type is Date. No other types are compatible.
{
bool have_date_or_datetime = false;
bool all_date_or_datetime = true;
for (const auto & type : types)
{
if (isDate(type) || isDateTime(type) || isDateTime64(type))
have_date_or_datetime = true;
else
all_date_or_datetime = false;
}
if (have_date_or_datetime)
{
if (!all_date_or_datetime)
return get_nothing_or_throw(" because some of them are Date/DateTime and some of them are not");
return std::make_shared<DataTypeDate>();
}
}
/// For numeric types, the most complicated part.
{
bool all_numbers = true;
size_t min_bits_of_signed_integer = 0;
size_t min_bits_of_unsigned_integer = 0;
size_t min_mantissa_bits_of_floating = 0;
auto minimize = [](size_t & what, size_t value)
{
if (what == 0 || value < what)
what = value;
};
for (const auto & type : types)
{
if (typeid_cast<const DataTypeUInt8 *>(type.get()))
minimize(min_bits_of_unsigned_integer, 8);
else if (typeid_cast<const DataTypeUInt16 *>(type.get()))
minimize(min_bits_of_unsigned_integer, 16);
else if (typeid_cast<const DataTypeUInt32 *>(type.get()))
minimize(min_bits_of_unsigned_integer, 32);
else if (typeid_cast<const DataTypeUInt64 *>(type.get()))
minimize(min_bits_of_unsigned_integer, 64);
else if (typeid_cast<const DataTypeInt8 *>(type.get()))
minimize(min_bits_of_signed_integer, 8);
else if (typeid_cast<const DataTypeInt16 *>(type.get()))
minimize(min_bits_of_signed_integer, 16);
else if (typeid_cast<const DataTypeInt32 *>(type.get()))
minimize(min_bits_of_signed_integer, 32);
else if (typeid_cast<const DataTypeInt64 *>(type.get()))
minimize(min_bits_of_signed_integer, 64);
else if (typeid_cast<const DataTypeFloat32 *>(type.get()))
minimize(min_mantissa_bits_of_floating, 24);
else if (typeid_cast<const DataTypeFloat64 *>(type.get()))
minimize(min_mantissa_bits_of_floating, 53);
else
all_numbers = false;
}
if (min_bits_of_signed_integer || min_bits_of_unsigned_integer || min_mantissa_bits_of_floating)
{
if (!all_numbers)
return get_nothing_or_throw(" because some of them are numbers and some of them are not");
/// If the result must be floating.
if (!min_bits_of_signed_integer && !min_bits_of_unsigned_integer)
{
if (min_mantissa_bits_of_floating <= 24)
return std::make_shared<DataTypeFloat32>();
else if (min_mantissa_bits_of_floating <= 53)
return std::make_shared<DataTypeFloat64>();
else
throw Exception("Logical error: " + getExceptionMessagePrefix(types)
+ " but as all data types are floats, we must have found maximum float type", ErrorCodes::NO_COMMON_TYPE);
}
/// If there are signed and unsigned types of same bit-width, the result must be unsigned number.
if (min_bits_of_unsigned_integer &&
(min_bits_of_signed_integer == 0 || min_bits_of_unsigned_integer <= min_bits_of_signed_integer))
{
if (min_bits_of_unsigned_integer <= 8)
return std::make_shared<DataTypeUInt8>();
else if (min_bits_of_unsigned_integer <= 16)
return std::make_shared<DataTypeUInt16>();
else if (min_bits_of_unsigned_integer <= 32)
return std::make_shared<DataTypeUInt32>();
else if (min_bits_of_unsigned_integer <= 64)
return std::make_shared<DataTypeUInt64>();
else
throw Exception("Logical error: " + getExceptionMessagePrefix(types)
+ " but as all data types are integers, we must have found maximum unsigned integer type", ErrorCodes::NO_COMMON_TYPE);
}
/// All signed.
{
if (min_bits_of_signed_integer <= 8)
return std::make_shared<DataTypeInt8>();
else if (min_bits_of_signed_integer <= 16)
return std::make_shared<DataTypeInt16>();
else if (min_bits_of_signed_integer <= 32)
return std::make_shared<DataTypeInt32>();
else if (min_bits_of_signed_integer <= 64)
return std::make_shared<DataTypeInt64>();
else
throw Exception("Logical error: " + getExceptionMessagePrefix(types)
+ " but as all data types are integers, we must have found maximum signed integer type", ErrorCodes::NO_COMMON_TYPE);
}
}
}
/// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases).
return get_nothing_or_throw("");
}
}