Skip to content

Commit

Permalink
Update UCD data to Unicode 12.1.0's Revision 24
Browse files Browse the repository at this point in the history
Had to teach the update program to accept category Lm as for
Joining_Transparent, for the sake of a new ArabicShaping.txt entry.
Added three new Unicode versions, several new scripts and a new
word-break class.

Updated UCD's test data for tst_QTextBoundaryFinder.  This left 57
tests failing; I have commented out the data rows for those tests,
pending someone with more knowledge addressing this.

Task-number: QTBUG-79631
Task-number: QTBUG-79418
Change-Id: Ic33d3b3551195d47a84d98e84020f57a68f0b201
Reviewed-by: Eskil Abrahamsen Blomfeldt <[email protected]>
  • Loading branch information
ediosyncratic committed Oct 30, 2019
1 parent fc3c6cd commit c3eb521
Show file tree
Hide file tree
Showing 31 changed files with 21,189 additions and 9,261 deletions.
20 changes: 18 additions & 2 deletions src/corelib/text/qchar.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Copyright (C) 2019 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
Expand Down Expand Up @@ -328,6 +328,19 @@ class Q_CORE_EXPORT QChar {
Script_Soyombo,
Script_ZanabazarSquare,

// Unicode 12.1 additions
Script_Dogra,
Script_GunjalaGondi,
Script_HanifiRohingya,
Script_Makasar,
Script_Medefaidrin,
Script_OldSogdian,
Script_Sogdian,
Script_Elymaic,
Script_Nandinagari,
Script_NyiakengPuachueHmong,
Script_Wancho,

ScriptCount
};

Expand Down Expand Up @@ -421,7 +434,10 @@ class Q_CORE_EXPORT QChar {
Unicode_7_0,
Unicode_8_0,
Unicode_9_0,
Unicode_10_0
Unicode_10_0,
Unicode_11_0,
Unicode_12_0,
Unicode_12_1,
};
// ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO

Expand Down
2 changes: 1 addition & 1 deletion src/corelib/text/qt_attribution.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
define the Unicode character properties and internal mappings.",
"Homepage": "https://www.unicode.org/ucd/",
"Version": "Don't use the Unicode standard version; UCD has its own 'Revision' numbers",
"Version": "20",
"Version": "24",
"License": "Unicode License Agreement - Data Files and Software (2016)",
"LicenseId": "Unicode-DFS-2016",
"LicenseFile": "UNICODE_LICENSE.txt",
Expand Down
15,190 changes: 7,849 additions & 7,341 deletions src/corelib/text/qunicodetables.cpp

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions src/corelib/text/qunicodetables_p.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Copyright (C) 2019 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
Expand Down Expand Up @@ -37,7 +37,7 @@
**
****************************************************************************/

/* This file is autogenerated from the Unicode 10.0 database. Do not edit */
/* This file is autogenerated from the Unicode 12.1 database. Do not edit */

//
// W A R N I N G
Expand All @@ -59,7 +59,7 @@

QT_BEGIN_NAMESPACE

#define UNICODE_DATA_VERSION QChar::Unicode_10_0
#define UNICODE_DATA_VERSION QChar::Unicode_12_1

namespace QUnicodeTables {

Expand Down Expand Up @@ -148,6 +148,7 @@ enum WordBreakClass {
WordBreak_E_Modifier,
WordBreak_Glue_After_Zwj,
WordBreak_E_Base_GAZ,
WordBreak_WSegSpace,
NumWordBreakClasses,
};

Expand Down
47 changes: 24 additions & 23 deletions src/corelib/text/qunicodetools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,29 +164,30 @@ enum Action {
};

static const uchar breakTable[QUnicodeTables::NumWordBreakClasses][QUnicodeTables::NumWordBreakClasses] = {
// Any CR LF Newline Extend ZWJ Format RI Katakana HLetter ALetter SQuote DQuote MidNumLet MidLetter MidNum Numeric ExtNumLet E_Base E_Mod GAZ EBG
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Any
{ Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Newline
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Extend
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, NoBreak }, // ZWJ
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Format
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // RegionalIndicator
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, Break , Break , Break , Break }, // Katakana
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Lookup , LookupW, LookupW, Break , NoBreak, NoBreak, Break , Break , Break , Break }, // HebrewLetter
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Break , LookupW, LookupW, Break , NoBreak, NoBreak, Break , Break , Break , Break }, // ALetter
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // SingleQuote
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // DoubleQuote
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNumLet
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , NoBreak, NoBreak, Lookup , Break , Lookup , Break , Lookup , NoBreak, NoBreak, Break , Break , Break , Break }, // Numeric
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , NoBreak, NoBreak, Break , Break , Break , Break }, // ExtendNumLet
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, Break , Break }, // E_Base
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // E_Mod
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // GAZ
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, Break , Break }, // EBG
// Any CR LF Newline Extend ZWJ Format RI Katakana HLetter ALetter SQuote DQuote MidNumLet MidLetter MidNum Numeric ExtNumLet E_Base E_Mod GAZ EBG WSeg
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Any
{ Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Newline
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Extend
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, NoBreak, Break }, // ZWJ
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Format
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // RegionalIndicator
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break }, // Katakana
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Lookup , LookupW, LookupW, Break , NoBreak, NoBreak, Break , Break , Break , Break , Break }, // HebrewLetter
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Break , LookupW, LookupW, Break , NoBreak, NoBreak, Break , Break , Break , Break , Break }, // ALetter
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // SingleQuote
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // DoubleQuote
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNumLet
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , NoBreak, NoBreak, Lookup , Break , Lookup , Break , Lookup , NoBreak, NoBreak, Break , Break , Break , Break , Break }, // Numeric
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , NoBreak, NoBreak, Break , Break , Break , Break , Break }, // ExtendNumLet
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, Break , Break , Break }, // E_Base
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // E_Mod
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // GAZ
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, Break , Break , Break }, // EBG
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // WSeg
};

} // namespace WB
Expand Down
17 changes: 15 additions & 2 deletions src/gui/text/qharfbuzzng.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Copyright (C) 2019 The Qt Company Ltd.
** Copyright (C) 2013 Konstantin Ritt
** Contact: https://www.qt.io/licensing/
**
Expand Down Expand Up @@ -216,7 +216,20 @@ static const hb_script_t _qtscript_to_hbscript[] = {
HB_SCRIPT_MASARAM_GONDI,
HB_SCRIPT_NUSHU,
HB_SCRIPT_SOYOMBO,
HB_SCRIPT_ZANABAZAR_SQUARE
HB_SCRIPT_ZANABAZAR_SQUARE,

// Unicode 12.1 additions (not present in harfbuzz-ng 1.7.4)
hb_script_t(HB_TAG('D', 'o', 'g', 'r')), // Script_Dogra
hb_script_t(HB_TAG('G', 'o', 'n', 'g')), // Script_GunjalaGondi
hb_script_t(HB_TAG('R', 'o', 'h', 'g')), // Script_HanifiRohingya
hb_script_t(HB_TAG('M', 'a', 'k', 'a')), // Script_Makasar
hb_script_t(HB_TAG('M', 'e', 'd', 'f')), // Script_Medefaidrin
hb_script_t(HB_TAG('S', 'o', 'g', 'o')), // Script_OldSogdian
hb_script_t(HB_TAG('S', 'o', 'g', 'd')), // Script_Sogdian
hb_script_t(HB_TAG('E', 'l', 'y', 'm')), // Script_Elymaic
hb_script_t(HB_TAG('N', 'a', 'n', 'd')), // Script_Nandinagari
hb_script_t(HB_TAG('H', 'm', 'n', 'p')), // Script_NyiakengPuachueHmong
hb_script_t(HB_TAG('W', 'c', 'h', 'o')), // Script_Wancho
};
Q_STATIC_ASSERT(QChar::ScriptCount == sizeof(_qtscript_to_hbscript) / sizeof(_qtscript_to_hbscript[0]));

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Copyright (C) 2019 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the plugins of the Qt Toolkit.
Expand Down Expand Up @@ -260,7 +260,18 @@ static const char specialLanguages[][6] = {
"", // MasaramGondi
"", // Nushu
"", // Soyombo
"" // ZanabazarSquare
"", // ZanabazarSquare
"", // Dogra
"", // GunjalaGondi
"", // HanifiRohingya
"", // Makasar
"", // Medefaidrin
"", // OldSogdian
"", // Sogdian
"", // Elymaic
"", // Nandinagari
"", // NyiakengPuachueHmong
"" // Wancho
};
Q_STATIC_ASSERT(sizeof specialLanguages / sizeof *specialLanguages == QChar::ScriptCount);

Expand Down
Loading

0 comments on commit c3eb521

Please sign in to comment.