forked from mysql/mysql-server
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WL#2673 Unicode Collation Algorithm new version
- Loading branch information
Alexander Barkov
committed
Jun 25, 2010
1 parent
b8d94ee
commit 89a7720
Showing
37 changed files
with
19,500 additions
and
1,490 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
# | ||
# WL#2673 Unicode collation algorithm new version | ||
# | ||
CREATE TABLE t1 AS SELECT repeat('a', 10) as c LIMIT 0; | ||
SHOW CREATE TABLE t1; | ||
|
||
# | ||
# Unicode-5.0.0 characters | ||
# | ||
|
||
# Latin Extended-B and IP extensions | ||
INSERT INTO t1 VALUES (_utf32 0x0180),(_utf32 0x023A); | ||
INSERT INTO t1 VALUES (_utf32 0x023B),(_utf32 0x023C); | ||
INSERT INTO t1 VALUES (_utf32 0x023D),(_utf32 0x023E); | ||
INSERT INTO t1 VALUES (_utf32 0x0241),(_utf32 0x0242); | ||
INSERT INTO t1 VALUES (_utf32 0x0243),(_utf32 0x0244); | ||
INSERT INTO t1 VALUES (_utf32 0x0245),(_utf32 0x0246); | ||
INSERT INTO t1 VALUES (_utf32 0x0247),(_utf32 0x0248); | ||
INSERT INTO t1 VALUES (_utf32 0x0249),(_utf32 0x024A); | ||
INSERT INTO t1 VALUES (_utf32 0x024B),(_utf32 0x024C); | ||
INSERT INTO t1 VALUES (_utf32 0x024D),(_utf32 0x024E); | ||
INSERT INTO t1 VALUES (_utf32 0x024F),(_utf32 0x026B); | ||
INSERT INTO t1 VALUES (_utf32 0x027D),(_utf32 0x0289); | ||
INSERT INTO t1 VALUES (_utf32 0x028C); | ||
|
||
# Greek and Coptic | ||
INSERT INTO t1 VALUES (_utf32 0x037B), (_utf32 0x037C); | ||
INSERT INTO t1 VALUES (_utf32 0x037D), (_utf32 0x03FD); | ||
INSERT INTO t1 VALUES (_utf32 0x03FE), (_utf32 0x03FF); | ||
|
||
# Cyrillic | ||
INSERT INTO t1 VALUES (_utf32 0x04C0), (_utf32 0x04CF); | ||
INSERT INTO t1 VALUES (_utf32 0x04F6), (_utf32 0x04F7); | ||
INSERT INTO t1 VALUES (_utf32 0x04FA), (_utf32 0x04FB); | ||
INSERT INTO t1 VALUES (_utf32 0x04FC), (_utf32 0x04FD); | ||
INSERT INTO t1 VALUES (_utf32 0x04FE), (_utf32 0x04FF); | ||
INSERT INTO t1 VALUES (_utf32 0x0510), (_utf32 0x0511); | ||
INSERT INTO t1 VALUES (_utf32 0x0512), (_utf32 0x0513); | ||
|
||
# Georgian, Georgian Supplement | ||
INSERT INTO t1 VALUES (_utf32 0x10A0), (_utf32 0x10A1); | ||
INSERT INTO t1 VALUES (_utf32 0x10A2), (_utf32 0x10A3); | ||
INSERT INTO t1 VALUES (_utf32 0x10A4), (_utf32 0x10A5); | ||
INSERT INTO t1 VALUES (_utf32 0x10A6), (_utf32 0x10A7); | ||
INSERT INTO t1 VALUES (_utf32 0x2D00), (_utf32 0x2D01); | ||
INSERT INTO t1 VALUES (_utf32 0x2D02), (_utf32 0x2D03); | ||
INSERT INTO t1 VALUES (_utf32 0x2D04), (_utf32 0x2D05); | ||
INSERT INTO t1 VALUES (_utf32 0x2D06), (_utf32 0x2D07); | ||
|
||
# Phonetic Extensions | ||
INSERT INTO t1 VALUES (_utf32 0x1D7D); | ||
|
||
# Letterlike Symbols | ||
INSERT INTO t1 VALUES (_utf32 0x2132),(_utf32 0x214E); | ||
|
||
# Number Forms | ||
INSERT INTO t1 VALUES (_utf32 0x2183),(_utf32 0x2184); | ||
|
||
# Coptic | ||
INSERT INTO t1 VALUES (_utf32 0x2C80), (_utf32 0x2C81); | ||
INSERT INTO t1 VALUES (_utf32 0x2C82), (_utf32 0x2C83); | ||
INSERT INTO t1 VALUES (_utf32 0x2C84), (_utf32 0x2C85); | ||
INSERT INTO t1 VALUES (_utf32 0x2C86), (_utf32 0x2C87); | ||
INSERT INTO t1 VALUES (_utf32 0x2C88), (_utf32 0x2C89); | ||
INSERT INTO t1 VALUES (_utf32 0x2C8A), (_utf32 0x2C8B); | ||
INSERT INTO t1 VALUES (_utf32 0x2C8C), (_utf32 0x2C8D); | ||
INSERT INTO t1 VALUES (_utf32 0x2C8E), (_utf32 0x2C8F); | ||
|
||
# Latin Extended-C | ||
INSERT INTO t1 VALUES (_utf32 0x2C60), (_utf32 0x2C61); | ||
INSERT INTO t1 VALUES (_utf32 0x2C62), (_utf32 0x2C63); | ||
INSERT INTO t1 VALUES (_utf32 0x2C64), (_utf32 0x2C65); | ||
INSERT INTO t1 VALUES (_utf32 0x2C66), (_utf32 0x2C67); | ||
INSERT INTO t1 VALUES (_utf32 0x2C68), (_utf32 0x2C69); | ||
INSERT INTO t1 VALUES (_utf32 0x2C6A), (_utf32 0x2C6B); | ||
INSERT INTO t1 VALUES (_utf32 0x2C6C), (_utf32 0x2C75); | ||
INSERT INTO t1 VALUES (_utf32 0x2C76); | ||
|
||
# Glagolitic | ||
INSERT INTO t1 VALUES (_utf32 0x2C00), (_utf32 0x2C01); | ||
INSERT INTO t1 VALUES (_utf32 0x2C02), (_utf32 0x2C03); | ||
INSERT INTO t1 VALUES (_utf32 0x2C04), (_utf32 0x2C05); | ||
INSERT INTO t1 VALUES (_utf32 0x2C06), (_utf32 0x2C07); | ||
INSERT INTO t1 VALUES (_utf32 0x2C30), (_utf32 0x2C31); | ||
INSERT INTO t1 VALUES (_utf32 0x2C32), (_utf32 0x2C33); | ||
INSERT INTO t1 VALUES (_utf32 0x2C34), (_utf32 0x2C35); | ||
INSERT INTO t1 VALUES (_utf32 0x2C36), (_utf32 0x2C37); | ||
|
||
# Deseret | ||
INSERT INTO t1 VALUES (_utf32 0x10400), (_utf32 0x10401); | ||
INSERT INTO t1 VALUES (_utf32 0x10402), (_utf32 0x10403); | ||
INSERT INTO t1 VALUES (_utf32 0x10404), (_utf32 0x10405); | ||
INSERT INTO t1 VALUES (_utf32 0x10406), (_utf32 0x10407); | ||
INSERT INTO t1 VALUES (_utf32 0x10428), (_utf32 0x10429); | ||
INSERT INTO t1 VALUES (_utf32 0x1042A), (_utf32 0x1042B); | ||
INSERT INTO t1 VALUES (_utf32 0x1042C), (_utf32 0x1042D); | ||
INSERT INTO t1 VALUES (_utf32 0x1042E), (_utf32 0x1042F); | ||
|
||
|
||
# | ||
# Unicode 5.1.0 characters | ||
# | ||
|
||
INSERT INTO t1 VALUES (_utf32 0x0370); # GREEK CAPITAL LETTER HETA | ||
INSERT INTO t1 VALUES (_utf32 0x0371); # GREEK SMALL LETTER HETA | ||
INSERT INTO t1 VALUES (_utf32 0x0372); # GREEK CAPITAL LETTER ARCHAIC SAMPI | ||
INSERT INTO t1 VALUES (_utf32 0x0373); # GREEK SMALL LETTER ARCHAIC SAMPI | ||
|
||
INSERT INTO t1 VALUES (_utf32 0x0514); # CYRILLIC CAPITAL LETTER LHA | ||
INSERT INTO t1 VALUES (_utf32 0x0515); # CYRILLIC SMALL LETTER LHA | ||
INSERT INTO t1 VALUES (_utf32 0x0516); # CYRILLIC CAPITAL LETTER RHA | ||
INSERT INTO t1 VALUES (_utf32 0x0517); # CYRILLIC SMALL LETTER RHA | ||
|
||
INSERT INTO t1 VALUES (_utf32 0xA640); # CYRILLIC CAPITAL LETTER ZEMLYA | ||
INSERT INTO t1 VALUES (_utf32 0xA641); # CYRILLIC SMALL LETTER ZEMLYA | ||
INSERT INTO t1 VALUES (_utf32 0xA642); # CYRILLIC CAPITAL LETTER DZELO | ||
INSERT INTO t1 VALUES (_utf32 0xA643); # CYRILLIC SMALL LETTER DZELO | ||
|
||
INSERT INTO t1 VALUES (_utf32 0xA722); # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF | ||
INSERT INTO t1 VALUES (_utf32 0xA723); # LATIN SMALL LETTER EGYPTOLOGICAL ALEF | ||
INSERT INTO t1 VALUES (_utf32 0xA724); # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN | ||
INSERT INTO t1 VALUES (_utf32 0xA725); # LATIN SMALL LETTER EGYPTOLOGICAL AIN | ||
|
||
INSERT INTO t1 VALUES (_utf32 0xA726); # LATIN CAPITAL LETTER HENG | ||
INSERT INTO t1 VALUES (_utf32 0xA727); # LATIN SMALL LETTER HENG | ||
INSERT INTO t1 VALUES (_utf32 0xA728); # LATIN CAPITAL LETTER TZ | ||
INSERT INTO t1 VALUES (_utf32 0xA729); # LATIN SMALL LETTER TZ | ||
INSERT INTO t1 VALUES (_utf32 0xA72A); # LATIN CAPITAL LETTER TRESILLO | ||
INSERT INTO t1 VALUES (_utf32 0xA72B); # LATIN SMALL LETTER TRESILLO | ||
|
||
# | ||
# Unicode 5.2.0 characters | ||
# | ||
|
||
INSERT INTO t1 VALUES (_utf32 0x2CEB); # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI;Lu;0;L;;;;;N;;;;2CEC; | ||
INSERT INTO t1 VALUES (_utf32 0x2CEC); # COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI;Ll;0;L;;;;;N;;;2CEB;;2CEB | ||
INSERT INTO t1 VALUES (_utf32 0x2CED); # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA;Lu;0;L;;;;;N;;;;2CEE; | ||
INSERT INTO t1 VALUES (_utf32 0x2CEE); # COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA;Ll;0;L;;;;;N;;;2CED;;2CED | ||
|
||
# | ||
# Check case folding and UCA weights | ||
# | ||
SELECT hex(c), hex(lower(c)), hex(upper(c)), hex(weight_string(c)), c | ||
FROM t1 ORDER BY c, BINARY c; | ||
|
||
|
||
# | ||
# Check that LIKE works fine with and without index. | ||
# This test makes sure that cs->min_sort_char and cs->max_sort_char | ||
# are set properly | ||
# Also check that LIKE is case insensitive for supplementary characters | ||
# | ||
INSERT INTO t1 VALUES ('a'); | ||
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0xFFFF)); | ||
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0x10FFFF)); | ||
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0x10400)); | ||
SELECT hex(c), hex(weight_string(c)) FROM t1 WHERE c LIKE 'a%' ORDER BY c; | ||
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10400 ORDER BY c, BINARY c; | ||
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10428 ORDER BY c, BINARY c; | ||
ALTER TABLE t1 ADD KEY(c); | ||
EXPLAIN SELECT hex(c) FROM t1 WHERE c LIKE 'a%' ORDER BY c; | ||
SELECT hex(c), hex(weight_string(c)) FROM t1 WHERE c LIKE 'a%' ORDER BY c; | ||
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10400 ORDER BY c, BINARY c; | ||
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10428 ORDER BY c, BINARY c; | ||
|
||
DROP TABLE t1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.