diff --git a/lib/Platform/Unicode/UnicodeData.inc b/lib/Platform/Unicode/UnicodeData.inc
index 00b935b00ed..a03d6b26a01 100644
--- a/lib/Platform/Unicode/UnicodeData.inc
+++ b/lib/Platform/Unicode/UnicodeData.inc
@@ -5,11 +5,28 @@
 // SpecialCasing.txt SHA1: 67fad2f44098864ce4362ea2434a85b82a7566ec
 // *** DO NOT EDIT BY HAND ***
 
+/// An inclusive range of Unicode characters.
 struct UnicodeRange {
   uint32_t first;
   uint32_t second;
 };
 
+/// A UnicodeTransformRange expresses a mapping such as case folding.
+/// A character cp is mapped to cp + delta if cp is 0 for the given modulus.
+struct UnicodeTransformRange {
+  /// The first codepoint of the range.
+  unsigned start : 24;
+
+  /// The number of characters in the range.
+  unsigned count : 8;
+
+  /// The signed delta amount.
+  int delta : 24;
+
+  /// The modulo amount.
+  unsigned modulo : 8;
+};
+
 // UNICODE_LETTERS Lu Ll Lt Lm Lo Nl
 // static constexpr uint32_t UNICODE_LETTERS_SIZE = 335;
 static constexpr UnicodeRange UNICODE_LETTERS[] = {
@@ -319,3 +336,64 @@ static constexpr UnicodePrecanonicalizationMapping UNICODE_PRECANONS[] = {
     {0x212A, {}},
     {0x212B, {}},
     {0xA64A, {0x1C88, 0xA64B}}};
+
+// static constexpr uint32_t LEGACY_CANONS_SIZE = 173;
+static constexpr UnicodeTransformRange LEGACY_CANONS[] = {
+    {0x0061, 26, -32, 1},    {0x00B5, 1, 743, 1},   {0x00E0, 23, -32, 1},
+    {0x00F8, 7, -32, 1},     {0x00FF, 1, 121, 1},   {0x0101, 47, -1, 2},
+    {0x0133, 5, -1, 2},      {0x013A, 15, -1, 2},   {0x014B, 45, -1, 2},
+    {0x017A, 5, -1, 2},      {0x0180, 1, 195, 1},   {0x0183, 3, -1, 2},
+    {0x0188, 5, -1, 4},      {0x0192, 1, -1, 1},    {0x0195, 1, 97, 1},
+    {0x0199, 1, -1, 1},      {0x019A, 1, 163, 1},   {0x019E, 1, 130, 1},
+    {0x01A1, 5, -1, 2},      {0x01A8, 6, -1, 5},    {0x01B0, 5, -1, 4},
+    {0x01B6, 4, -1, 3},      {0x01BD, 1, -1, 1},    {0x01BF, 1, 56, 1},
+    {0x01C5, 1, -1, 1},      {0x01C6, 1, -2, 1},    {0x01C8, 1, -1, 1},
+    {0x01C9, 1, -2, 1},      {0x01CB, 1, -1, 1},    {0x01CC, 1, -2, 1},
+    {0x01CE, 15, -1, 2},     {0x01DD, 1, -79, 1},   {0x01DF, 17, -1, 2},
+    {0x01F2, 1, -1, 1},      {0x01F3, 1, -2, 1},    {0x01F5, 5, -1, 4},
+    {0x01FB, 37, -1, 2},     {0x0223, 17, -1, 2},   {0x023C, 1, -1, 1},
+    {0x023F, 2, 10815, 1},   {0x0242, 6, -1, 5},    {0x0249, 7, -1, 2},
+    {0x0250, 1, 10783, 1},   {0x0251, 1, 10780, 1}, {0x0252, 1, 10782, 1},
+    {0x0253, 1, -210, 1},    {0x0254, 1, -206, 1},  {0x0256, 2, -205, 1},
+    {0x0259, 1, -202, 1},    {0x025B, 1, -203, 1},  {0x025C, 1, 42319, 1},
+    {0x0260, 1, -205, 1},    {0x0261, 1, 42315, 1}, {0x0263, 1, -207, 1},
+    {0x0265, 1, 42280, 1},   {0x0266, 1, 42308, 1}, {0x0268, 1, -209, 1},
+    {0x0269, 1, -211, 1},    {0x026A, 1, 42308, 1}, {0x026B, 1, 10743, 1},
+    {0x026C, 1, 42305, 1},   {0x026F, 1, -211, 1},  {0x0271, 1, 10749, 1},
+    {0x0272, 1, -213, 1},    {0x0275, 1, -214, 1},  {0x027D, 1, 10727, 1},
+    {0x0280, 1, -218, 1},    {0x0282, 1, 42307, 1}, {0x0283, 1, -218, 1},
+    {0x0287, 1, 42282, 1},   {0x0288, 1, -218, 1},  {0x0289, 1, -69, 1},
+    {0x028A, 2, -217, 1},    {0x028C, 1, -71, 1},   {0x0292, 1, -219, 1},
+    {0x029D, 1, 42261, 1},   {0x029E, 1, 42258, 1}, {0x0345, 1, 84, 1},
+    {0x0371, 3, -1, 2},      {0x0377, 1, -1, 1},    {0x037B, 3, 130, 1},
+    {0x03AC, 1, -38, 1},     {0x03AD, 3, -37, 1},   {0x03B1, 17, -32, 1},
+    {0x03C2, 1, -31, 1},     {0x03C3, 9, -32, 1},   {0x03CC, 1, -64, 1},
+    {0x03CD, 2, -63, 1},     {0x03D0, 1, -62, 1},   {0x03D1, 1, -57, 1},
+    {0x03D5, 1, -47, 1},     {0x03D6, 1, -54, 1},   {0x03D7, 1, -8, 1},
+    {0x03D9, 23, -1, 2},     {0x03F0, 1, -86, 1},   {0x03F1, 1, -80, 1},
+    {0x03F2, 1, 7, 1},       {0x03F3, 1, -116, 1},  {0x03F5, 1, -96, 1},
+    {0x03F8, 4, -1, 3},      {0x0430, 32, -32, 1},  {0x0450, 16, -80, 1},
+    {0x0461, 33, -1, 2},     {0x048B, 53, -1, 2},   {0x04C2, 13, -1, 2},
+    {0x04CF, 1, -15, 1},     {0x04D1, 95, -1, 2},   {0x0561, 38, -48, 1},
+    {0x10D0, 43, 3008, 1},   {0x10FD, 3, 3008, 1},  {0x13F8, 6, -8, 1},
+    {0x1C80, 1, -6254, 1},   {0x1C81, 1, -6253, 1}, {0x1C82, 1, -6244, 1},
+    {0x1C83, 2, -6242, 1},   {0x1C85, 1, -6243, 1}, {0x1C86, 1, -6236, 1},
+    {0x1C87, 1, -6181, 1},   {0x1C88, 1, 35266, 1}, {0x1D79, 1, 35332, 1},
+    {0x1D7D, 1, 3814, 1},    {0x1D8E, 1, 35384, 1}, {0x1E01, 149, -1, 2},
+    {0x1E9B, 1, -59, 1},     {0x1EA1, 95, -1, 2},   {0x1F00, 8, 8, 1},
+    {0x1F10, 6, 8, 1},       {0x1F20, 8, 8, 1},     {0x1F30, 8, 8, 1},
+    {0x1F40, 6, 8, 1},       {0x1F51, 7, 8, 2},     {0x1F60, 8, 8, 1},
+    {0x1F70, 2, 74, 1},      {0x1F72, 4, 86, 1},    {0x1F76, 2, 100, 1},
+    {0x1F78, 2, 128, 1},     {0x1F7A, 2, 112, 1},   {0x1F7C, 2, 126, 1},
+    {0x1FB0, 2, 8, 1},       {0x1FBE, 1, -7205, 1}, {0x1FD0, 2, 8, 1},
+    {0x1FE0, 2, 8, 1},       {0x1FE5, 1, 7, 1},     {0x214E, 1, -28, 1},
+    {0x2170, 16, -16, 1},    {0x2184, 1, -1, 1},    {0x24D0, 26, -26, 1},
+    {0x2C30, 47, -48, 1},    {0x2C61, 1, -1, 1},    {0x2C65, 1, -10795, 1},
+    {0x2C66, 1, -10792, 1},  {0x2C68, 5, -1, 2},    {0x2C73, 4, -1, 3},
+    {0x2C81, 99, -1, 2},     {0x2CEC, 3, -1, 2},    {0x2CF3, 1, -1, 1},
+    {0x2D00, 38, -7264, 1},  {0x2D27, 7, -7264, 6}, {0xA641, 45, -1, 2},
+    {0xA681, 27, -1, 2},     {0xA723, 13, -1, 2},   {0xA733, 61, -1, 2},
+    {0xA77A, 3, -1, 2},      {0xA77F, 9, -1, 2},    {0xA78C, 6, -1, 5},
+    {0xA793, 1, -1, 1},      {0xA794, 1, 48, 1},    {0xA797, 19, -1, 2},
+    {0xA7B5, 11, -1, 2},     {0xA7C3, 1, -1, 1},    {0xAB53, 1, -928, 1},
+    {0xAB70, 80, -38864, 1}, {0xFF41, 26, -32, 1}};
diff --git a/utils/genUnicodeTable.py b/utils/genUnicodeTable.py
index 215db789f46..53c9152f102 100755
--- a/utils/genUnicodeTable.py
+++ b/utils/genUnicodeTable.py
@@ -48,8 +48,24 @@ def print_header(unicodedata_sha1, specialcasing_sha1):
 // SpecialCasing.txt SHA1: ${specialcasing_sha1}
 // *** DO NOT EDIT BY HAND ***
 
+/// An inclusive range of Unicode characters.
 struct UnicodeRange { uint32_t first; uint32_t second; };
 
+/// A UnicodeTransformRange expresses a mapping such as case folding.
+/// A character cp is mapped to cp + delta if cp is 0 for the given modulus.
+struct UnicodeTransformRange {
+    /// The first codepoint of the range.
+    unsigned start:24;
+
+    /// The number of characters in the range.
+    unsigned count:8;
+
+    /// The signed delta amount.
+    int delta:24;
+
+    /// The modulo amount.
+    unsigned modulo:8;
+};
 """,
         today=str(datetime.date.today()),
         unicodedata_sha1=unicodedata_sha1,
@@ -114,6 +130,58 @@ def print_categories(unicode_data_lines):
         run_interval(unicode_data_lines, cat.split())
 
 
+def stride_from(p1, p2):
+    return p2[0] - p1[0]
+
+
+def delta_within(p):
+    return p[1] - p[0]
+
+
+def as_hex(cp):
+    return "0x%.4X" % cp
+
+
+class DeltaMapBlock(object):
+    def __init__(self):
+        self.pairs = []
+
+    def stride(self):
+        return stride_from(self.pairs[0], self.pairs[1])
+
+    def delta(self):
+        return delta_within(self.pairs[0])
+
+    def can_append(self, pair):
+        if not self.pairs:
+            return True
+        if pair[0] - self.pairs[0][0] >= 256:
+            return False
+        if self.delta() != delta_within(pair):
+            return False
+        return len(self.pairs) < 2 or self.stride() == stride_from(self.pairs[-1], pair)
+
+    @staticmethod
+    def append_to_list(blocks, p):
+        if not blocks or not blocks[-1].can_append(p):
+            blocks.append(DeltaMapBlock())
+        blocks[-1].pairs.append(p)
+
+    def output(self):
+        pairs = self.pairs
+        if not pairs:
+            return ""
+
+        first = pairs[0][0]
+        last = pairs[-1][0]
+        modulo = self.stride() if len(pairs) >= 2 else 1
+        delta = self.delta()
+        code = Template("{$first, $count, $delta, $modulo}").substitute(
+            first=as_hex(first), count=last - first + 1, delta=delta, modulo=modulo
+        )
+        return code.strip()
+
+
 class CaseMap(object):
     """Unicode case mapping helper.
 
@@ -186,6 +254,29 @@ def canonicalize(self, ch):
         return upper_ch
 
 
+def print_canonicalizations(casemap):
+    blocks = []
+    for cp in casemap.codepoints:
+        # legacy does not decode surrogate pairs, so we can skip large code points.
+        if cp > 0xFFFF:
+            continue
+        canon_cp = casemap.canonicalize(cp)
+        if cp != canon_cp:
+            DeltaMapBlock.append_to_list(blocks, (cp, canon_cp))
+
+    print_template(
+        """
+// static constexpr uint32_t ${name}_SIZE = ${entry_count};
+static constexpr UnicodeTransformRange ${name}[] = {
+${entry_text}
+};
+""",
+        name="LEGACY_CANONS",
+        entry_count=len(blocks),
+        entry_text=",\n".join(b.output() for b in blocks),
+    )
+
+
 def print_precanonicalizations(casemap):
     """Print a table of pre-canonicalizations.
 
@@ -265,5 +356,9 @@ def as_hex(cp):
     )
     udata_lines = unicode_data.decode("utf-8").splitlines()
     special_lines = special_casing.decode("utf-8").splitlines()
+    casemap = CaseMap(
+        unicode_data_lines=udata_lines, special_casing_lines=special_lines
+    )
     print_categories(udata_lines)
-    print_precanonicalizations(CaseMap(udata_lines, special_lines))
+    print_precanonicalizations(casemap)
+    print_canonicalizations(casemap)