diff --git a/lib/web_ui/lib/src/engine/text/line_breaker.dart b/lib/web_ui/lib/src/engine/text/line_breaker.dart index fa2adb8561812..938e1ba3a38a8 100644 --- a/lib/web_ui/lib/src/engine/text/line_breaker.dart +++ b/lib/web_ui/lib/src/engine/text/line_breaker.dart @@ -127,10 +127,6 @@ List _computeLineBreakFragments(String text) { int? codePoint = getCodePoint(text, 0); LineCharProperty? curr = lineLookup.findForChar(codePoint); - // When there's a sequence of spaces, this variable contains the base property - // i.e. the property of the character preceding the sequence. - LineCharProperty baseOfSpaceSequence = LineCharProperty.WJ; - // When there's a sequence of combining marks, this variable contains the base // property i.e. the property of the character preceding the sequence. LineCharProperty baseOfCombiningMarks = LineCharProperty.AL; @@ -146,6 +142,9 @@ List _computeLineBreakFragments(String text) { type == LineBreakType.endOfText ? text.length : index; assert(fragmentEnd >= fragmentStart); + // Uncomment the following line to help debug line breaking. + // print('{$fragmentStart:$fragmentEnd} [$debugRuleNumber] -- $type'); + if (prev1 == LineCharProperty.SP) { trailingSpaces++; } else if (_isHardBreak(prev1) || prev1 == LineCharProperty.CR) { @@ -244,13 +243,6 @@ List _computeLineBreakFragments(String text) { break; } - // Establish the base for the space sequence. - if (prev1 != LineCharProperty.SP) { - // When the text/line starts with SP, we should treat the beginning of text/line - // as if it were a WJ (word joiner). - baseOfSpaceSequence = prev1 ?? LineCharProperty.WJ; - } - // Do not break before spaces or zero width space. // LB7: × SP // × ZW @@ -259,11 +251,17 @@ List _computeLineBreakFragments(String text) { continue; } + // Break after spaces. + // LB18: SP ÷ + if (prev1 == LineCharProperty.SP) { + setBreak(LineBreakType.opportunity, 18); + continue; + } + // Break before any character following a zero-width space, even if one or // more spaces intervene. // LB8: ZW SP* ÷ - if (prev1 == LineCharProperty.ZW || - baseOfSpaceSequence == LineCharProperty.ZW) { + if (prev1 == LineCharProperty.ZW) { setBreak(LineBreakType.opportunity, 8); continue; } @@ -343,6 +341,8 @@ List _computeLineBreakFragments(String text) { // The above is a quote from unicode.org. In our implementation, we did the // following modification: When there are spaces present, we consider it a // line break opportunity. + // + // We made this modification to match the browser behavior. if (prev1 != LineCharProperty.SP && (curr == LineCharProperty.CL || curr == LineCharProperty.CP || @@ -358,6 +358,8 @@ List _computeLineBreakFragments(String text) { // // The above is a quote from unicode.org. In our implementation, we did the // following modification: Allow breaks when there are spaces. + // + // We made this modification to match the browser behavior. if (prev1 == LineCharProperty.OP) { setBreak(LineBreakType.prohibited, 14); continue; @@ -368,6 +370,8 @@ List _computeLineBreakFragments(String text) { // // The above is a quote from unicode.org. In our implementation, we did the // following modification: Allow breaks when there are spaces. + // + // We made this modification to match the browser behavior. if (prev1 == LineCharProperty.QU && curr == LineCharProperty.OP) { setBreak(LineBreakType.prohibited, 15); continue; @@ -376,10 +380,12 @@ List _computeLineBreakFragments(String text) { // Do not break between closing punctuation and a nonstarter, even with // intervening spaces. // LB16: (CL | CP) SP* × NS - if ((prev1 == LineCharProperty.CL || - baseOfSpaceSequence == LineCharProperty.CL || - prev1 == LineCharProperty.CP || - baseOfSpaceSequence == LineCharProperty.CP) && + // + // The above is a quote from unicode.org. In our implementation, we did the + // following modification: Allow breaks when there are spaces. + // + // We made this modification to match the browser behavior. + if ((prev1 == LineCharProperty.CL || prev1 == LineCharProperty.CP) && curr == LineCharProperty.NS) { setBreak(LineBreakType.prohibited, 16); continue; @@ -387,20 +393,16 @@ List _computeLineBreakFragments(String text) { // Do not break within ‘——’, even with intervening spaces. // LB17: B2 SP* × B2 - if ((prev1 == LineCharProperty.B2 || - baseOfSpaceSequence == LineCharProperty.B2) && - curr == LineCharProperty.B2) { + // + // The above is a quote from unicode.org. In our implementation, we did the + // following modification: Allow breaks when there are spaces. + // + // We made this modification to match the browser behavior. + if (prev1 == LineCharProperty.B2 && curr == LineCharProperty.B2) { setBreak(LineBreakType.prohibited, 17); continue; } - // Break after spaces. - // LB18: SP ÷ - if (prev1 == LineCharProperty.SP) { - setBreak(LineBreakType.opportunity, 18); - continue; - } - // Do not break before or after quotation marks, such as ‘”’. // LB19: × QU // QU × diff --git a/lib/web_ui/test/text/line_breaker_test_helper.dart b/lib/web_ui/test/text/line_breaker_test_helper.dart index 7fa153dfb3805..8093bca91fb44 100644 --- a/lib/web_ui/test/text/line_breaker_test_helper.dart +++ b/lib/web_ui/test/text/line_breaker_test_helper.dart @@ -2,79 +2,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// The following test cases contradict rule LB25, so we are replacing them -// with correct expectations. -const Map _replacements = { - '× 007D ÷ 0025 ÷ # × [0.3] RIGHT CURLY BRACKET (CL) ÷ [999.0] PERCENT SIGN (PO) ÷ [0.3]': - '× 007D × 0025 ÷ # × [0.3] RIGHT CURLY BRACKET (CL) × [999.0] PERCENT SIGN (PO) ÷ [0.3]', - '× 007D × 0308 ÷ 0025 ÷ # × [0.3] RIGHT CURLY BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] PERCENT SIGN (PO) ÷ [0.3]': - '× 007D × 0308 × 0025 ÷ # × [0.3] RIGHT CURLY BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_CM) × [999.0] PERCENT SIGN (PO) ÷ [0.3]', - '× 007D ÷ 0024 ÷ # × [0.3] RIGHT CURLY BRACKET (CL) ÷ [999.0] DOLLAR SIGN (PR) ÷ [0.3]': - '× 007D × 0024 ÷ # × [0.3] RIGHT CURLY BRACKET (CL) × [999.0] DOLLAR SIGN (PR) ÷ [0.3]', - '× 007D × 0308 ÷ 0024 ÷ # × [0.3] RIGHT CURLY BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] DOLLAR SIGN (PR) ÷ [0.3]': - '× 007D × 0308 × 0024 ÷ # × [0.3] RIGHT CURLY BRACKET (CL) × [9.0] COMBINING DIAERESIS (CM1_CM) × [999.0] DOLLAR SIGN (PR) ÷ [0.3]', - '× 002C ÷ 0030 ÷ # × [0.3] COMMA (IS) ÷ [999.0] DIGIT ZERO (NU) ÷ [0.3]': - '× 002C × 0030 ÷ # × [0.3] COMMA (IS) × [999.0] DIGIT ZERO (NU) ÷ [0.3]', - '× 002C × 0308 ÷ 0030 ÷ # × [0.3] COMMA (IS) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] DIGIT ZERO (NU) ÷ [0.3]': - '× 002C × 0308 × 0030 ÷ # × [0.3] COMMA (IS) × [9.0] COMBINING DIAERESIS (CM1_CM) × [999.0] DIGIT ZERO (NU) ÷ [0.3]', - '× 0025 ÷ 2329 ÷ # × [0.3] PERCENT SIGN (PO) ÷ [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]': - '× 0025 × 2329 ÷ # × [0.3] PERCENT SIGN (PO) × [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]', - '× 0025 × 0308 ÷ 2329 ÷ # × [0.3] PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]': - '× 0025 × 0308 × 2329 ÷ # × [0.3] PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_CM) × [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]', - '× 0025 ÷ 0028 ÷ # × [0.3] PERCENT SIGN (PO) ÷ [999.0] LEFT PARENTHESIS (OP_OP30) ÷ [0.3]': - '× 0025 × 0028 ÷ # × [0.3] PERCENT SIGN (PO) × [999.0] LEFT PARENTHESIS (OP_OP30) ÷ [0.3]', - '× 0025 × 0308 ÷ 0028 ÷ # × [0.3] PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] LEFT PARENTHESIS (OP_OP30) ÷ [0.3]': - '× 0025 × 0308 × 0028 ÷ # × [0.3] PERCENT SIGN (PO) × [9.0] COMBINING DIAERESIS (CM1_CM) × [999.0] LEFT PARENTHESIS (OP_OP30) ÷ [0.3]', - '× 0024 ÷ 2329 ÷ # × [0.3] DOLLAR SIGN (PR) ÷ [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]': - '× 0024 × 2329 ÷ # × [0.3] DOLLAR SIGN (PR) × [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]', - '× 0024 × 0308 ÷ 2329 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]': - '× 0024 × 0308 × 2329 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [999.0] LEFT-POINTING ANGLE BRACKET (OP) ÷ [0.3]', - '× 0024 ÷ 0028 ÷ # × [0.3] DOLLAR SIGN (PR) ÷ [999.0] LEFT PARENTHESIS (OP_OP30) ÷ [0.3]': - '× 0024 × 0028 ÷ # × [0.3] DOLLAR SIGN (PR) × [999.0] LEFT PARENTHESIS (OP_OP30) ÷ [0.3]', - '× 0024 × 0308 ÷ 0028 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] LEFT PARENTHESIS (OP_OP30) ÷ [0.3]': - '× 0024 × 0308 × 0028 ÷ # × [0.3] DOLLAR SIGN (PR) × [9.0] COMBINING DIAERESIS (CM1_CM) × [999.0] LEFT PARENTHESIS (OP_OP30) ÷ [0.3]', - '× 002F ÷ 0030 ÷ # × [0.3] SOLIDUS (SY) ÷ [999.0] DIGIT ZERO (NU) ÷ [0.3]': - '× 002F × 0030 ÷ # × [0.3] SOLIDUS (SY) × [999.0] DIGIT ZERO (NU) ÷ [0.3]', - '× 002F × 0308 ÷ 0030 ÷ # × [0.3] SOLIDUS (SY) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] DIGIT ZERO (NU) ÷ [0.3]': - '× 002F × 0308 × 0030 ÷ # × [0.3] SOLIDUS (SY) × [9.0] COMBINING DIAERESIS (CM1_CM) × [999.0] DIGIT ZERO (NU) ÷ [0.3]', - '× 0029 ÷ 0025 ÷ # × [0.3] RIGHT PARENTHESIS (CP_CP30) ÷ [999.0] PERCENT SIGN (PO) ÷ [0.3]': - '× 0029 × 0025 ÷ # × [0.3] RIGHT PARENTHESIS (CP_CP30) × [999.0] PERCENT SIGN (PO) ÷ [0.3]', - '× 0029 × 0308 ÷ 0025 ÷ # × [0.3] RIGHT PARENTHESIS (CP_CP30) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] PERCENT SIGN (PO) ÷ [0.3]': - '× 0029 × 0308 × 0025 ÷ # × [0.3] RIGHT PARENTHESIS (CP_CP30) × [9.0] COMBINING DIAERESIS (CM1_CM) × [999.0] PERCENT SIGN (PO) ÷ [0.3]', - '× 0029 ÷ 0024 ÷ # × [0.3] RIGHT PARENTHESIS (CP_CP30) ÷ [999.0] DOLLAR SIGN (PR) ÷ [0.3]': - '× 0029 × 0024 ÷ # × [0.3] RIGHT PARENTHESIS (CP_CP30) × [999.0] DOLLAR SIGN (PR) ÷ [0.3]', - '× 0029 × 0308 ÷ 0024 ÷ # × [0.3] RIGHT PARENTHESIS (CP_CP30) × [9.0] COMBINING DIAERESIS (CM1_CM) ÷ [999.0] DOLLAR SIGN (PR) ÷ [0.3]': - '× 0029 × 0308 × 0024 ÷ # × [0.3] RIGHT PARENTHESIS (CP_CP30) × [9.0] COMBINING DIAERESIS (CM1_CM) × [999.0] DOLLAR SIGN (PR) ÷ [0.3]', - '× 0065 × 0071 × 0075 × 0061 × 006C × 0073 × 0020 × 002E ÷ 0033 × 0035 × 0020 ÷ 0063 × 0065 × 006E × 0074 × 0073 ÷ # × [0.3] LATIN SMALL LETTER E (AL) × [28.0] LATIN SMALL LETTER Q (AL) × [28.0] LATIN SMALL LETTER U (AL) × [28.0] LATIN SMALL LETTER A (AL) × [28.0] LATIN SMALL LETTER L (AL) × [28.0] LATIN SMALL LETTER S (AL) × [7.01] SPACE (SP) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT THREE (NU) × [25.03] DIGIT FIVE (NU) × [7.01] SPACE (SP) ÷ [18.0] LATIN SMALL LETTER C (AL) × [28.0] LATIN SMALL LETTER E (AL) × [28.0] LATIN SMALL LETTER N (AL) × [28.0] LATIN SMALL LETTER T (AL) × [28.0] LATIN SMALL LETTER S (AL) ÷ [0.3]': - '× 0065 × 0071 × 0075 × 0061 × 006C × 0073 × 0020 × 002E × 0033 × 0035 × 0020 ÷ 0063 × 0065 × 006E × 0074 × 0073 ÷ # × [0.3] LATIN SMALL LETTER E (AL) × [28.0] LATIN SMALL LETTER Q (AL) × [28.0] LATIN SMALL LETTER U (AL) × [28.0] LATIN SMALL LETTER A (AL) × [28.0] LATIN SMALL LETTER L (AL) × [28.0] LATIN SMALL LETTER S (AL) × [7.01] SPACE (SP) × [13.02] FULL STOP (IS) × [999.0] DIGIT THREE (NU) × [25.03] DIGIT FIVE (NU) × [7.01] SPACE (SP) ÷ [18.0] LATIN SMALL LETTER C (AL) × [28.0] LATIN SMALL LETTER E (AL) × [28.0] LATIN SMALL LETTER N (AL) × [28.0] LATIN SMALL LETTER T (AL) × [28.0] LATIN SMALL LETTER S (AL) ÷ [0.3]', - '× 0063 × 006F × 0064 × 0065 × 005C ÷ 0028 × 0073 × 005C × 0029 ÷ # × [0.3] LATIN SMALL LETTER C (AL) × [28.0] LATIN SMALL LETTER O (AL) × [28.0] LATIN SMALL LETTER D (AL) × [28.0] LATIN SMALL LETTER E (AL) × [24.03] REVERSE SOLIDUS (PR) ÷ [999.0] LEFT PARENTHESIS (OP_OP30) × [14.0] LATIN SMALL LETTER S (AL) × [24.03] REVERSE SOLIDUS (PR) × [13.02] RIGHT PARENTHESIS (CP_CP30) ÷ [0.3]': - '× 0063 × 006F × 0064 × 0065 × 005C × 0028 × 0073 × 005C × 0029 ÷ # × [0.3] LATIN SMALL LETTER C (AL) × [28.0] LATIN SMALL LETTER O (AL) × [28.0] LATIN SMALL LETTER D (AL) × [28.0] LATIN SMALL LETTER E (AL) × [24.03] REVERSE SOLIDUS (PR) × [999.0] LEFT PARENTHESIS (OP_OP30) × [14.0] LATIN SMALL LETTER S (AL) × [24.03] REVERSE SOLIDUS (PR) × [13.02] RIGHT PARENTHESIS (CP_CP30) ÷ [0.3]', - '× 0063 × 006F × 0064 × 0065 × 005C ÷ 007B × 0073 × 005C × 007D ÷ # × [0.3] LATIN SMALL LETTER C (AL) × [28.0] LATIN SMALL LETTER O (AL) × [28.0] LATIN SMALL LETTER D (AL) × [28.0] LATIN SMALL LETTER E (AL) × [24.03] REVERSE SOLIDUS (PR) ÷ [999.0] LEFT CURLY BRACKET (OP_OP30) × [14.0] LATIN SMALL LETTER S (AL) × [24.03] REVERSE SOLIDUS (PR) × [13.02] RIGHT CURLY BRACKET (CL) ÷ [0.3]': - '× 0063 × 006F × 0064 × 0065 × 005C × 007B × 0073 × 005C × 007D ÷ # × [0.3] LATIN SMALL LETTER C (AL) × [28.0] LATIN SMALL LETTER O (AL) × [28.0] LATIN SMALL LETTER D (AL) × [28.0] LATIN SMALL LETTER E (AL) × [24.03] REVERSE SOLIDUS (PR) × [999.0] LEFT CURLY BRACKET (OP_OP30) × [14.0] LATIN SMALL LETTER S (AL) × [24.03] REVERSE SOLIDUS (PR) × [13.02] RIGHT CURLY BRACKET (CL) ÷ [0.3]', - '× 0061 × 002E ÷ 0032 × 0020 ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT TWO (NU) × [7.01] SPACE (SP) ÷ [0.3]': - '× 0061 × 002E × 0032 × 0020 ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT TWO (NU) × [7.01] SPACE (SP) ÷ [0.3]', - '× 0061 × 002E ÷ 0032 × 0020 ÷ 0915 ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT TWO (NU) × [7.01] SPACE (SP) ÷ [18.0] DEVANAGARI LETTER KA (AL) ÷ [0.3]': - '× 0061 × 002E × 0032 × 0020 ÷ 0915 ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT TWO (NU) × [7.01] SPACE (SP) ÷ [18.0] DEVANAGARI LETTER KA (AL) ÷ [0.3]', - '× 0061 × 002E ÷ 0032 × 0020 ÷ 672C ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT TWO (NU) × [7.01] SPACE (SP) ÷ [18.0] CJK UNIFIED IDEOGRAPH-672C (ID) ÷ [0.3]': - '× 0061 × 002E × 0032 × 0020 ÷ 672C ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT TWO (NU) × [7.01] SPACE (SP) ÷ [18.0] CJK UNIFIED IDEOGRAPH-672C (ID) ÷ [0.3]', - '× 0061 × 002E ÷ 0032 × 3000 ÷ 672C ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT TWO (NU) × [21.01] IDEOGRAPHIC SPACE (BA) ÷ [999.0] CJK UNIFIED IDEOGRAPH-672C (ID) ÷ [0.3]': - '× 0061 × 002E × 0032 × 3000 ÷ 672C ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT TWO (NU) × [21.01] IDEOGRAPHIC SPACE (BA) ÷ [999.0] CJK UNIFIED IDEOGRAPH-672C (ID) ÷ [0.3]', - '× 0061 × 002E ÷ 0032 × 3000 ÷ 307E ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT TWO (NU) × [21.01] IDEOGRAPHIC SPACE (BA) ÷ [999.0] HIRAGANA LETTER MA (ID) ÷ [0.3]': - '× 0061 × 002E × 0032 × 3000 ÷ 307E ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT TWO (NU) × [21.01] IDEOGRAPHIC SPACE (BA) ÷ [999.0] HIRAGANA LETTER MA (ID) ÷ [0.3]', - '× 0061 × 002E ÷ 0032 × 3000 ÷ 0033 ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT TWO (NU) × [21.01] IDEOGRAPHIC SPACE (BA) ÷ [999.0] DIGIT THREE (NU) ÷ [0.3]': - '× 0061 × 002E × 0032 × 3000 ÷ 0033 ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT TWO (NU) × [21.01] IDEOGRAPHIC SPACE (BA) ÷ [999.0] DIGIT THREE (NU) ÷ [0.3]', - '× 0041 × 002E ÷ 0031 × 0020 ÷ BABB ÷ # × [0.3] LATIN CAPITAL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT ONE (NU) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE MOS (H3) ÷ [0.3]': - '× 0041 × 002E × 0031 × 0020 ÷ BABB ÷ # × [0.3] LATIN CAPITAL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT ONE (NU) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE MOS (H3) ÷ [0.3]', - '× BD24 ÷ C5B4 × 002E × 0020 ÷ 0041 × 002E ÷ 0032 × 0020 ÷ BCFC ÷ # × [0.3] HANGUL SYLLABLE BWASS (H3) ÷ [999.0] HANGUL SYLLABLE EO (H2) × [13.02] FULL STOP (IS) × [7.01] SPACE (SP) ÷ [18.0] LATIN CAPITAL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT TWO (NU) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE BOL (H3) ÷ [0.3]': - '× BD24 ÷ C5B4 × 002E × 0020 ÷ 0041 × 002E × 0032 × 0020 ÷ BCFC ÷ # × [0.3] HANGUL SYLLABLE BWASS (H3) ÷ [999.0] HANGUL SYLLABLE EO (H2) × [13.02] FULL STOP (IS) × [7.01] SPACE (SP) ÷ [18.0] LATIN CAPITAL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT TWO (NU) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE BOL (H3) ÷ [0.3]', - '× BD10 ÷ C694 × 002E × 0020 ÷ 0041 × 002E ÷ 0033 × 0020 ÷ BABB ÷ # × [0.3] HANGUL SYLLABLE BWA (H2) ÷ [999.0] HANGUL SYLLABLE YO (H2) × [13.02] FULL STOP (IS) × [7.01] SPACE (SP) ÷ [18.0] LATIN CAPITAL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT THREE (NU) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE MOS (H3) ÷ [0.3]': - '× BD10 ÷ C694 × 002E × 0020 ÷ 0041 × 002E × 0033 × 0020 ÷ BABB ÷ # × [0.3] HANGUL SYLLABLE BWA (H2) ÷ [999.0] HANGUL SYLLABLE YO (H2) × [13.02] FULL STOP (IS) × [7.01] SPACE (SP) ÷ [18.0] LATIN CAPITAL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT THREE (NU) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE MOS (H3) ÷ [0.3]', - '× C694 × 002E × 0020 ÷ 0041 × 002E ÷ 0034 × 0020 ÷ BABB ÷ # × [0.3] HANGUL SYLLABLE YO (H2) × [13.02] FULL STOP (IS) × [7.01] SPACE (SP) ÷ [18.0] LATIN CAPITAL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT FOUR (NU) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE MOS (H3) ÷ [0.3]': - '× C694 × 002E × 0020 ÷ 0041 × 002E × 0034 × 0020 ÷ BABB ÷ # × [0.3] HANGUL SYLLABLE YO (H2) × [13.02] FULL STOP (IS) × [7.01] SPACE (SP) ÷ [18.0] LATIN CAPITAL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT FOUR (NU) × [7.01] SPACE (SP) ÷ [18.0] HANGUL SYLLABLE MOS (H3) ÷ [0.3]', - '× 0061 × 002E ÷ 0032 × 3000 ÷ 300C ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) ÷ [999.0] DIGIT TWO (NU) × [21.01] IDEOGRAPHIC SPACE (BA) ÷ [999.0] LEFT CORNER BRACKET (OP) ÷ [0.3]': - '× 0061 × 002E × 0032 × 3000 ÷ 300C ÷ # × [0.3] LATIN SMALL LETTER A (AL) × [13.02] FULL STOP (IS) × [999.0] DIGIT TWO (NU) × [21.01] IDEOGRAPHIC SPACE (BA) ÷ [999.0] LEFT CORNER BRACKET (OP) ÷ [0.3]', -}; - /// Parses raw test data into a list of [TestCase] objects. List parseRawTestData(String rawTestData) { return rawTestData @@ -90,25 +17,51 @@ bool isValidTestCase(String line) { } String _checkReplacement(String line) { - String replacement = _replacements[line] ?? line; - // Special case for rule LB13 to allow line breaks after spaces. - if (replacement.contains('SPACE (SP) × [13.')) { + String replacement = line; + + // Special cases for rules LB8, LB11, LB13, LB14, LB15, LB16, LB17 to allow + // line breaks after spaces. + final RegExp spacesRegex = RegExp(r'SPACE \(SP\) × \[(8|11|13|14|15|16|17)\.'); + if (replacement.contains(spacesRegex)) { replacement = replacement - .replaceAll('0020 ×', '0020 ÷') - .replaceFirst('SPACE (SP) × [13.', 'SPACE (SP) ÷ [13.'); + .replaceAll('0020 ×', '0020 ÷') // SPACE (SP) + .replaceAllMapped(spacesRegex, (Match m) => 'SPACE (SP) ÷ [${m.group(1)}.'); } - // Special case for rule LB14 to allow line breaks after spaces. - if (replacement.contains('SPACE (SP) × [14.')) { + + // Some test cases contradict rule LB25, so we are fixing them with the few + // regexes below. + + final RegExp lb25Regex1 = RegExp(r'\((CP_CP30|CL)\)(.*?) ÷ \[999\.0\] (PERCENT|DOLLAR)'); + if (replacement.contains(lb25Regex1)) { replacement = replacement - .replaceAll('0020 ×', '0020 ÷') - .replaceAll('SPACE (SP) × [14.', 'SPACE (SP) ÷ [14.'); + .replaceAll(' ÷ 0024', ' × 0024') // DOLLAR SIGN (PR) + .replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO) + .replaceAllMapped( + lb25Regex1, + (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}', + ); } - // Special case for rule LB15 to allow line breaks after spaces. - if (replacement.contains('SPACE (SP) × [15.')) { + final RegExp lb25Regex2 = RegExp(r'\((IS|SY)\)(.*?) ÷ \[999\.0\] (DIGIT)'); + if (replacement.contains(lb25Regex2)) { replacement = replacement - .replaceAll('0020 ×', '0020 ÷') - .replaceAll('SPACE (SP) × [15.', 'SPACE (SP) ÷ [15.'); + .replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU) + .replaceAllMapped( + lb25Regex2, + (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}', + ); } + final RegExp lb25Regex3 = RegExp(r'\((PR|PO)\)(.*?) ÷ \[999\.0\] (LEFT)'); + if (replacement.contains(lb25Regex3)) { + replacement = replacement + .replaceAll(' ÷ 0028', ' × 0028') // LEFT PARENTHESIS (OP_OP30) + .replaceAll(' ÷ 007B', ' × 007B') // LEFT CURLY BRACKET (OP_OP30) + .replaceAll(' ÷ 2329', ' × 2329') // LEFT-POINTING ANGLE BRACKET (OP) + .replaceAllMapped( + lb25Regex3, + (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}', + ); + } + return replacement; }