|
38 | 38 | #include "ecma-regexp-object.h"
|
39 | 39 | #endif /* JERRY_BUILTIN_REGEXP */
|
40 | 40 |
|
| 41 | +#if JERRY_ICU |
| 42 | +#include "unicode/unorm2.h" |
| 43 | +#endif /* JERRY_ICU */ |
| 44 | + |
41 | 45 | #if JERRY_BUILTIN_STRING
|
42 | 46 |
|
43 | 47 | #define ECMA_BUILTINS_INTERNAL
|
|
80 | 84 |
|
81 | 85 | ECMA_STRING_PROTOTYPE_SUBSTR,
|
82 | 86 |
|
| 87 | + ECMA_STRING_PROTOTYPE_NORMALIZE, |
83 | 88 | ECMA_STRING_PROTOTYPE_REPEAT,
|
84 | 89 | ECMA_STRING_PROTOTYPE_CODE_POINT_AT,
|
85 | 90 | ECMA_STRING_PROTOTYPE_PAD_START,
|
@@ -1226,6 +1231,141 @@ ecma_builtin_string_prototype_object_trim (ecma_string_t *original_string_p) /**
|
1226 | 1231 |
|
1227 | 1232 | #if JERRY_ESNEXT
|
1228 | 1233 |
|
| 1234 | +/** |
| 1235 | + * ICU string normalizer instance callback |
| 1236 | + */ |
| 1237 | +typedef const UNormalizer2 *(*icu_string_normalizer_instance_cb_t) (UErrorCode *); |
| 1238 | + |
| 1239 | +/** |
| 1240 | + * Normalization form descriptor |
| 1241 | + */ |
| 1242 | +typedef struct |
| 1243 | +{ |
| 1244 | + lit_magic_string_id_t kind; /**< kind */ |
| 1245 | + icu_string_normalizer_instance_cb_t instance_cb; /**< normalizer instance callback */ |
| 1246 | +} icu_string_form_normalizer_t; |
| 1247 | + |
| 1248 | +/** |
| 1249 | + * Helper macro to register form normalizer entries |
| 1250 | + */ |
| 1251 | +#if JERRY_ICU |
| 1252 | +#define FORM_ENTRY(id, instance_cb) \ |
| 1253 | + { \ |
| 1254 | + id, instance_cb \ |
| 1255 | + } |
| 1256 | +#else /* !JERRY_ICU */ |
| 1257 | +#define FORM_ENTRY(id, instance_cb) \ |
| 1258 | + { \ |
| 1259 | + (id, NULL) \ |
| 1260 | + } |
| 1261 | +#endif /* JERRY_ICU */ |
| 1262 | + |
| 1263 | +/** |
| 1264 | + * List of normalization forms |
| 1265 | + */ |
| 1266 | +static const icu_string_form_normalizer_t icu_string_normalize_forms[] = { |
| 1267 | + FORM_ENTRY (LIT_MAGIC_STRING_NFC_U, unorm2_getNFCInstance), |
| 1268 | + FORM_ENTRY (LIT_MAGIC_STRING_NFD_U, unorm2_getNFDInstance), |
| 1269 | + FORM_ENTRY (LIT_MAGIC_STRING_NFKC_U, unorm2_getNFKCInstance), |
| 1270 | + FORM_ENTRY (LIT_MAGIC_STRING_NFKD_U, unorm2_getNFKDInstance) |
| 1271 | +}; |
| 1272 | + |
| 1273 | +#undef FORM_ENTRY |
| 1274 | + |
| 1275 | +/** |
| 1276 | + * The String.prototype object's 'normalize' routine |
| 1277 | + * |
| 1278 | + * See also: |
| 1279 | + * ECMA-262 v12, 22.1.3.13 |
| 1280 | + * |
| 1281 | + * @return ecma value |
| 1282 | + * Returned value must be freed with ecma_free_value. |
| 1283 | + */ |
| 1284 | +static ecma_value_t |
| 1285 | +ecma_builtin_string_prototype_object_normalize (ecma_string_t *original_string_p, /**< this argument */ |
| 1286 | + ecma_value_t form_value) /**< normalization from */ |
| 1287 | +{ |
| 1288 | + icu_string_normalizer_instance_cb_t normalizer_instance_cb = unorm2_getNFCInstance; |
| 1289 | + |
| 1290 | + if (!ecma_is_value_undefined (form_value)) |
| 1291 | + { |
| 1292 | + ecma_string_t *form_p = ecma_op_to_string (form_value); |
| 1293 | + |
| 1294 | + if (JERRY_UNLIKELY (form_p == NULL)) |
| 1295 | + { |
| 1296 | + return ECMA_VALUE_ERROR; |
| 1297 | + } |
| 1298 | + |
| 1299 | + size_t forms_size = sizeof (icu_string_normalize_forms) / sizeof (icu_string_normalize_forms[0]); |
| 1300 | + uint32_t form_idx = 0; |
| 1301 | + |
| 1302 | + for (; form_idx < forms_size; form_idx++) |
| 1303 | + { |
| 1304 | + if (ecma_compare_ecma_string_to_magic_id (form_p, icu_string_normalize_forms[form_idx].kind)) |
| 1305 | + { |
| 1306 | + normalizer_instance_cb = icu_string_normalize_forms[form_idx].instance_cb; |
| 1307 | + break; |
| 1308 | + } |
| 1309 | + } |
| 1310 | + |
| 1311 | + ecma_deref_ecma_string (form_p); |
| 1312 | + |
| 1313 | + if (form_idx >= forms_size) |
| 1314 | + { |
| 1315 | + return ecma_raise_range_error (ECMA_ERR_INVALID_NORMALIZATION_FORM); |
| 1316 | + } |
| 1317 | + } |
| 1318 | + |
| 1319 | +#if JERRY_ICU |
| 1320 | + JERRY_ASSERT (normalizer_instance_cb != NULL); |
| 1321 | + size_t string_size = ecma_string_get_size (original_string_p); |
| 1322 | + |
| 1323 | + if (string_size == 0) |
| 1324 | + { |
| 1325 | +#endif /* JERRY_ICU */ |
| 1326 | + ecma_ref_ecma_string (original_string_p); |
| 1327 | + return ecma_make_string_value (original_string_p); |
| 1328 | +#if JERRY_ICU |
| 1329 | + } |
| 1330 | +#endif /* JERRY_ICU */ |
| 1331 | + |
| 1332 | + UErrorCode status = U_ZERO_ERROR; |
| 1333 | + const UNormalizer2 *normalizer_cb = normalizer_instance_cb (&status); |
| 1334 | + |
| 1335 | + if (!U_FAILURE (status)) |
| 1336 | + { |
| 1337 | + ecma_value_t result = ECMA_VALUE_ERROR; |
| 1338 | + |
| 1339 | + lit_utf8_size_t length; |
| 1340 | + uint16_t *buffer_p = ecma_string_cesu8_to_utf16 (original_string_p, &length); |
| 1341 | + int32_t norm_length = unorm2_normalize (normalizer_cb, buffer_p, (int32_t) length, NULL, 0, &status); |
| 1342 | + |
| 1343 | + if (!U_FAILURE (status) || status == U_BUFFER_OVERFLOW_ERROR) |
| 1344 | + { |
| 1345 | + uint16_t *norm_buff_p = (uint16_t *) jmem_heap_alloc_block ((uint32_t) norm_length * sizeof (uint16_t)); |
| 1346 | + |
| 1347 | + status = U_ZERO_ERROR; |
| 1348 | + norm_length = unorm2_normalize (normalizer_cb, buffer_p, (int32_t) length, norm_buff_p, norm_length, &status); |
| 1349 | + |
| 1350 | + if (!U_FAILURE (status)) |
| 1351 | + { |
| 1352 | + result = ecma_make_string_value (ecma_new_ecma_string_from_utf16 (norm_buff_p, (uint32_t) norm_length)); |
| 1353 | + } |
| 1354 | + |
| 1355 | + jmem_heap_free_block (norm_buff_p, (uint32_t) norm_length * sizeof (uint16_t)); |
| 1356 | + } |
| 1357 | + |
| 1358 | + jmem_heap_free_block (buffer_p, length * sizeof (uint16_t)); |
| 1359 | + |
| 1360 | + if (!ECMA_IS_VALUE_ERROR (result)) |
| 1361 | + { |
| 1362 | + return result; |
| 1363 | + } |
| 1364 | + } |
| 1365 | + |
| 1366 | + return ecma_raise_type_error (ECMA_ERR_NORMALIZATION_FAILED); |
| 1367 | +} /* ecma_builtin_string_prototype_object_normalize */ |
| 1368 | + |
1229 | 1369 | /**
|
1230 | 1370 | * The String.prototype object's 'repeat' routine
|
1231 | 1371 | *
|
@@ -1570,6 +1710,11 @@ ecma_builtin_string_prototype_dispatch_routine (uint8_t builtin_routine_id, /**<
|
1570 | 1710 | }
|
1571 | 1711 | #endif /* JERRY_BUILTIN_ANNEXB */
|
1572 | 1712 | #if JERRY_ESNEXT
|
| 1713 | + case ECMA_STRING_PROTOTYPE_NORMALIZE: |
| 1714 | + { |
| 1715 | + ret_value = ecma_builtin_string_prototype_object_normalize (string_p, arg1); |
| 1716 | + break; |
| 1717 | + } |
1573 | 1718 | case ECMA_STRING_PROTOTYPE_REPEAT:
|
1574 | 1719 | {
|
1575 | 1720 | ret_value = ecma_builtin_string_prototype_object_repeat (string_p, arg1);
|
|
0 commit comments