Skip to content

Commit

Permalink
Bug 1859752 - Part 3: Update in-tree ICU to 74. r=platform-i18n-revie…
Browse files Browse the repository at this point in the history
…wers,dminor

Differential Revision: https://phabricator.services.mozilla.com/D192728
  • Loading branch information
anba committed Nov 7, 2023
1 parent 564c2c5 commit ecfc3b2
Show file tree
Hide file tree
Showing 1,883 changed files with 205,782 additions and 484,590 deletions.
4 changes: 4 additions & 0 deletions config/external/icu/common/sources.mozbuild
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ sources = [
'/intl/icu/source/common/uloc.cpp',
'/intl/icu/source/common/uloc_keytype.cpp',
'/intl/icu/source/common/uloc_tag.cpp',
'/intl/icu/source/common/ulocale.cpp',
'/intl/icu/source/common/ulocbuilder.cpp',
'/intl/icu/source/common/umapfile.cpp',
'/intl/icu/source/common/umath.cpp',
'/intl/icu/source/common/umutablecptrie.cpp',
Expand Down Expand Up @@ -271,6 +273,8 @@ EXPORTS.unicode += [
'/intl/icu/source/common/unicode/uiter.h',
'/intl/icu/source/common/unicode/uldnames.h',
'/intl/icu/source/common/unicode/uloc.h',
'/intl/icu/source/common/unicode/ulocale.h',
'/intl/icu/source/common/unicode/ulocbuilder.h',
'/intl/icu/source/common/unicode/umachine.h',
'/intl/icu/source/common/unicode/umisc.h',
'/intl/icu/source/common/unicode/umutablecptrie.h',
Expand Down
Binary file not shown.
8 changes: 4 additions & 4 deletions intl/icu/GIT-INFO
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
commit 5861e1fd52f1d7673eee38bc3c965aa18b336062
Author: Peter Edberg <[email protected]>
Date: Tue Apr 11 10:32:35 2023 -0700
commit 9edac7b78327a1cb58db29e2714b15f9fa14e4d7
Author: Markus Scherer <[email protected]>
Date: Fri Oct 27 15:04:44 2023 -0700

ICU-22221 update cldr-icu instructions, and ICU tag for integration
ICU-22326 ICU 74 release not candidate
4 changes: 2 additions & 2 deletions intl/icu/source/Doxyfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ INLINE_INHERITED_MEMB = NO
# shortest path that makes the file name unique will be used
# The default value is: YES.

FULL_PATH_NAMES = NO
FULL_PATH_NAMES = YES

# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
# Stripping is only done if one of the specified strings matches the left-hand
Expand All @@ -160,7 +160,7 @@ FULL_PATH_NAMES = NO
# will be relative from the directory where doxygen is started.
# This tag requires that the tag FULL_PATH_NAMES is set to YES.

STRIP_FROM_PATH =
STRIP_FROM_PATH = @srcdir@

# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
# path mentioned in the documentation of a class, which tells the reader which
Expand Down
2 changes: 1 addition & 1 deletion intl/icu/source/allinone/Build.Windows.IcuVersion.props
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- This file is used to set the ICU Major Version number, which is used as a suffix on various file names in other Visual Studio projects. -->
<PropertyGroup>
<IcuMajorVersion>73</IcuMajorVersion>
<IcuMajorVersion>74</IcuMajorVersion>
</PropertyGroup>
</Project>
4 changes: 4 additions & 0 deletions intl/icu/source/common/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -603,12 +603,16 @@ cc_library(
"locbased.cpp",
"locid.cpp",
"loclikely.cpp",
"loclikelysubtags.cpp",
"locmap.cpp",
"lsr.cpp",
"resbund.cpp",
"resource.cpp",
"uloc.cpp",
"uloc_tag.cpp",
"uloc_keytype.cpp",
"ulocale.cpp",
"ulocbuilder.cpp",
"uresbund.cpp",
"uresdata.cpp",
"wintz.cpp",
Expand Down
121 changes: 93 additions & 28 deletions intl/icu/source/common/brkeng.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "unicode/uscript.h"
#include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h"
#include "unicode/rbbi.h"

#include "brkeng.h"
#include "cmemory.h"
Expand Down Expand Up @@ -70,19 +71,21 @@ UnhandledEngine::~UnhandledEngine() {
}

UBool
UnhandledEngine::handles(UChar32 c) const {
UnhandledEngine::handles(UChar32 c, const char* locale) const {
(void)locale; // Unused
return fHandled && fHandled->contains(c);
}

int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t /* startPos */,
int32_t startPos,
int32_t endPos,
UVector32 &/*foundBreaks*/,
UBool /* isPhraseBreaking */,
UErrorCode &status) const {
if (U_FAILURE(status)) return 0;
UChar32 c = utext_current32(text);
utext_setNativeIndex(text, startPos);
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
Expand Down Expand Up @@ -120,49 +123,47 @@ ICULanguageBreakFactory::~ICULanguageBreakFactory() {
}
}

U_NAMESPACE_END
U_CDECL_BEGIN
static void U_CALLCONV _deleteEngine(void *obj) {
delete (const icu::LanguageBreakEngine *) obj;
void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
if (fEngines == nullptr) {
LocalPointer<UStack> engines(new UStack(uprv_deleteUObject, nullptr, status), status);
if (U_SUCCESS(status)) {
fEngines = engines.orphan();
}
}
}
U_CDECL_END
U_NAMESPACE_BEGIN

const LanguageBreakEngine *
ICULanguageBreakFactory::getEngineFor(UChar32 c) {
ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) {
const LanguageBreakEngine *lbe = nullptr;
UErrorCode status = U_ZERO_ERROR;
ensureEngines(status);
if (U_FAILURE(status) ) {
// Note: no way to return error code to caller.
return nullptr;
}

static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);

if (fEngines == nullptr) {
LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status);
if (U_FAILURE(status) ) {
// Note: no way to return error code to caller.
return nullptr;
}
fEngines = engines.orphan();
} else {
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != nullptr && lbe->handles(c)) {
return lbe;
}
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != nullptr && lbe->handles(c, locale)) {
return lbe;
}
}

// We didn't find an engine. Create one.
lbe = loadEngineFor(c);
lbe = loadEngineFor(c, locale);
if (lbe != nullptr) {
fEngines->push((void *)lbe, status);
}
return U_SUCCESS(status) ? lbe : nullptr;
}

const LanguageBreakEngine *
ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
ICULanguageBreakFactory::loadEngineFor(UChar32 c, const char*) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
Expand Down Expand Up @@ -299,6 +300,70 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
return nullptr;
}


void ICULanguageBreakFactory::addExternalEngine(
ExternalBreakEngine* external, UErrorCode& status) {
LocalPointer<ExternalBreakEngine> engine(external, status);
ensureEngines(status);
LocalPointer<BreakEngineWrapper> wrapper(
new BreakEngineWrapper(engine.orphan(), status), status);
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
fEngines->push(wrapper.getAlias(), status);
wrapper.orphan();
}

BreakEngineWrapper::BreakEngineWrapper(
ExternalBreakEngine* engine, UErrorCode &status) : delegate(engine, status) {
}

BreakEngineWrapper::~BreakEngineWrapper() {
}

UBool BreakEngineWrapper::handles(UChar32 c, const char* locale) const {
return delegate->isFor(c, locale);
}

int32_t BreakEngineWrapper::findBreaks(
UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode &status) const {
if (U_FAILURE(status)) return 0;
int32_t result = 0;

// Find the span of characters included in the set.
// The span to break begins at the current position in the text, and
// extends towards the start or end of the text, depending on 'reverse'.

utext_setNativeIndex(text, startPos);
int32_t start = (int32_t)utext_getNativeIndex(text);
int32_t current;
int32_t rangeStart;
int32_t rangeEnd;
UChar32 c = utext_current32(text);
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && delegate->handles(c)) {
utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text);
}
rangeStart = start;
rangeEnd = current;
int32_t beforeSize = foundBreaks.size();
int32_t additionalCapacity = rangeEnd - rangeStart + 1;
// enlarge to contains (rangeEnd-rangeStart+1) more items
foundBreaks.ensureCapacity(beforeSize+additionalCapacity, status);
if (U_FAILURE(status)) return 0;
foundBreaks.setSize(beforeSize + beforeSize+additionalCapacity);
result = delegate->fillBreaks(text, rangeStart, rangeEnd, foundBreaks.getBuffer()+beforeSize,
additionalCapacity, status);
if (U_FAILURE(status)) return 0;
foundBreaks.setSize(beforeSize + result);
utext_setNativeIndex(text, current);
return result;
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
59 changes: 53 additions & 6 deletions intl/icu/source/common/brkeng.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#ifndef BRKENG_H
#define BRKENG_H

#include "unicode/umisc.h"
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/utext.h"
Expand All @@ -21,6 +22,7 @@ class UnicodeSet;
class UStack;
class UVector32;
class DictionaryMatcher;
class ExternalBreakEngine;

/*******************************************************************
* LanguageBreakEngine
Expand All @@ -35,7 +37,7 @@ class DictionaryMatcher;
* <p>LanguageBreakEngines should normally be implemented so as to
* be shared between threads without locking.</p>
*/
class LanguageBreakEngine : public UMemory {
class LanguageBreakEngine : public UObject {
public:

/**
Expand All @@ -54,10 +56,11 @@ class LanguageBreakEngine : public UMemory {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param locale The locale.
* @return true if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c) const = 0;
virtual UBool handles(UChar32 c, const char* locale) const = 0;

/**
* <p>Find any breaks within a run in the supplied text.</p>
Expand All @@ -80,6 +83,35 @@ class LanguageBreakEngine : public UMemory {

};

/*******************************************************************
* BreakEngineWrapper
*/

/**
* <p>BreakEngineWrapper implement LanguageBreakEngine by
* a thin wrapper that delegate the task to ExternalBreakEngine
* </p>
*/
class BreakEngineWrapper : public LanguageBreakEngine {
public:

BreakEngineWrapper(ExternalBreakEngine* engine, UErrorCode &status);

virtual ~BreakEngineWrapper();

virtual UBool handles(UChar32 c, const char* locale) const override;

virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode &status) const override;

private:
LocalPointer<ExternalBreakEngine> delegate;
};

/*******************************************************************
* LanguageBreakFactory
*/
Expand Down Expand Up @@ -125,9 +157,10 @@ class LanguageBreakFactory : public UMemory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param locale The locale.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) = 0;

};

Expand Down Expand Up @@ -174,10 +207,11 @@ class UnhandledEngine : public LanguageBreakEngine {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param locale The locale.
* @return true if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c) const override;
virtual UBool handles(UChar32 c, const char* locale) const override;

/**
* <p>Find any breaks within a run in the supplied text.</p>
Expand Down Expand Up @@ -247,9 +281,18 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param locale The locale.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) override;
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) override;

/**
* Add and adopt the engine and return an URegistryKey.
* @param engine The ExternalBreakEngine to be added and adopt. The caller
* pass the ownership and should not release the memory after this.
* @param status the error code.
*/
virtual void addExternalEngine(ExternalBreakEngine* engine, UErrorCode& status);

protected:
/**
Expand All @@ -258,9 +301,10 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param locale The locale.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, const char* locale);

/**
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
Expand All @@ -269,6 +313,9 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
* @return A DictionaryMatcher with the desired characteristics, or nullptr.
*/
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);

private:
void ensureEngines(UErrorCode& status);
};

U_NAMESPACE_END
Expand Down
Loading

0 comments on commit ecfc3b2

Please sign in to comment.