From 9eec9848e902b564a53fe23684a992ce9a32d5d9 Mon Sep 17 00:00:00 2001 From: Jason Simmons Date: Thu, 12 Apr 2018 10:40:59 -0700 Subject: [PATCH] libtxt: improvements to glyph cluster handling and grapheme breaking (#4978) * Extend Minikin to record the cluster identifier corresponding to each glyph * Use the cluster values to determine the range of input code units that map to a glyph * Use Minikin's libraries to find boundaries between graphemes within a ligature Fixes https://github.com/flutter/flutter/issues/16151 --- third_party/txt/src/minikin/Layout.cpp | 13 ++- third_party/txt/src/minikin/Layout.h | 5 ++ third_party/txt/src/txt/paragraph.cc | 119 ++++++++++++++----------- third_party/txt/src/txt/paragraph.h | 1 - 4 files changed, 81 insertions(+), 57 deletions(-) diff --git a/third_party/txt/src/minikin/Layout.cpp b/third_party/txt/src/minikin/Layout.cpp index 17a80910f8c46..aacfc4d374442 100644 --- a/third_party/txt/src/minikin/Layout.cpp +++ b/third_party/txt/src/minikin/Layout.cpp @@ -1076,7 +1076,9 @@ void Layout::doLayoutRun(const uint16_t* buf, float xoff = HBFixedToFloat(positions[i].x_offset); float yoff = -HBFixedToFloat(positions[i].y_offset); xoff += yoff * ctx->paint.skewX; - LayoutGlyph glyph = {font_ix, glyph_ix, x + xoff, y + yoff}; + LayoutGlyph glyph = { + font_ix, glyph_ix, x + xoff, y + yoff, + static_cast(info[i].cluster - clusterOffset)}; mGlyphs.push_back(glyph); float xAdvance = HBFixedToFloat(positions[i].x_advance); if ((ctx->paint.paintFlags & LinearTextFlag) == 0) { @@ -1137,7 +1139,8 @@ void Layout::appendLayout(Layout* src, size_t start, float extraAdvance) { unsigned int glyph_id = srcGlyph.glyph_id; float x = x0 + srcGlyph.x; float y = srcGlyph.y; - LayoutGlyph glyph = {font_ix, glyph_id, x, y}; + LayoutGlyph glyph = {font_ix, glyph_id, x, y, + static_cast(srcGlyph.cluster + start)}; mGlyphs.push_back(glyph); } for (size_t i = 0; i < src->mAdvances.size(); i++) { @@ -1174,6 +1177,12 @@ unsigned int Layout::getGlyphId(int i) const { return glyph.glyph_id; } +// libtxt extension +unsigned int Layout::getGlyphCluster(int i) const { + const LayoutGlyph& glyph = mGlyphs[i]; + return glyph.cluster; +} + float Layout::getX(int i) const { const LayoutGlyph& glyph = mGlyphs[i]; return glyph.x; diff --git a/third_party/txt/src/minikin/Layout.h b/third_party/txt/src/minikin/Layout.h index 3b60ef6817d1b..fe06e73ad69c5 100644 --- a/third_party/txt/src/minikin/Layout.h +++ b/third_party/txt/src/minikin/Layout.h @@ -37,6 +37,10 @@ struct LayoutGlyph { unsigned int glyph_id; float x; float y; + + // libtxt extension: record the cluster (character index) that corresponds + // to this glyph + ssize_t cluster; }; // Internal state used during layout operation @@ -94,6 +98,7 @@ class Layout { const MinikinFont* getFont(int i) const; FontFakery getFakery(int i) const; unsigned int getGlyphId(int i) const; + uint32_t getGlyphCluster(int i) const; // libtxt extension float getX(int i) const; float getY(int i) const; diff --git a/third_party/txt/src/txt/paragraph.cc b/third_party/txt/src/txt/paragraph.cc index aec59771798e4..515013d6e11f1 100644 --- a/third_party/txt/src/txt/paragraph.cc +++ b/third_party/txt/src/txt/paragraph.cc @@ -29,6 +29,7 @@ #include "font_skia.h" #include "lib/fxl/logging.h" #include "minikin/FontLanguageListCache.h" +#include "minikin/GraphemeBreak.h" #include "minikin/HbFontCache.h" #include "minikin/LayoutUtils.h" #include "minikin/LineBreaker.h" @@ -409,14 +410,6 @@ void Paragraph::Layout(double width, bool force) { if (!ComputeBidiRuns(&bidi_runs)) return; - if (!grapheme_breaker_) { - UErrorCode icu_status = U_ZERO_ERROR; - grapheme_breaker_.reset( - icu::BreakIterator::createCharacterInstance(icu::Locale(), icu_status)); - if (!U_SUCCESS(icu_status)) - return; - } - SkPaint paint; paint.setAntiAlias(true); paint.setTextEncoding(SkPaint::kGlyphID_TextEncoding); @@ -537,14 +530,12 @@ void Paragraph::Layout(double width, bool force) { if (layout.nGlyphs() == 0) continue; + std::vector layout_advances(text_count); + layout.getAdvances(layout_advances.data()); + // Break the layout into blobs that share the same SkPaint parameters. std::vector> glyph_blobs = GetLayoutTypefaceRuns(layout); - grapheme_breaker_->setText( - icu::UnicodeString(false, text_ptr + text_start, text_count)); - if (run.is_rtl()) - grapheme_breaker_->last(); - double word_start_position = std::numeric_limits::quiet_NaN(); // Build a Skia text blob from each group of glyphs. @@ -556,58 +547,78 @@ void Paragraph::Layout(double width, bool force) { builder.allocRunPos(paint, glyph_blob.end - glyph_blob.start); for (size_t glyph_index = glyph_blob.start; - glyph_index < glyph_blob.end; ++glyph_index) { - size_t blob_index = glyph_index - glyph_blob.start; - blob_buffer.glyphs[blob_index] = layout.getGlyphId(glyph_index); - - size_t pos_index = blob_index * 2; - double glyph_x_offset = layout.getX(glyph_index) + justify_x_offset; - blob_buffer.pos[pos_index] = glyph_x_offset; - blob_buffer.pos[pos_index + 1] = layout.getY(glyph_index); - - // The glyph may be a ligature. Determine how many input characters - // are joined into this glyph. Note that each character may be - // encoded as multiple UTF-16 code units. - Range glyph_code_units; - std::vector subglyph_code_unit_counts; + glyph_index < glyph_blob.end;) { + size_t cluster_start_glyph_index = glyph_index; + uint32_t cluster = layout.getGlyphCluster(cluster_start_glyph_index); + double glyph_x_offset; + + // Add all the glyphs in this cluster to the text blob. + do { + size_t blob_index = glyph_index - glyph_blob.start; + blob_buffer.glyphs[blob_index] = layout.getGlyphId(glyph_index); + + size_t pos_index = blob_index * 2; + blob_buffer.pos[pos_index] = + layout.getX(glyph_index) + justify_x_offset; + blob_buffer.pos[pos_index + 1] = layout.getY(glyph_index); + + if (glyph_index == cluster_start_glyph_index) + glyph_x_offset = blob_buffer.pos[pos_index]; + + glyph_index++; + } while (glyph_index < glyph_blob.end && + layout.getGlyphCluster(glyph_index) == cluster); + + Range glyph_code_units(cluster, 0); + std::vector grapheme_code_unit_counts; if (run.is_rtl()) { - glyph_code_units.end = grapheme_breaker_->current(); - glyph_code_units.start = grapheme_breaker_->previous(); - if (glyph_code_units.start == icu::BreakIterator::DONE) - break; - subglyph_code_unit_counts.push_back(glyph_code_units.width()); + if (cluster_start_glyph_index > 0) { + glyph_code_units.end = + layout.getGlyphCluster(cluster_start_glyph_index - 1); + } else { + glyph_code_units.end = text_count; + } + grapheme_code_unit_counts.push_back(glyph_code_units.width()); } else { - glyph_code_units.start = grapheme_breaker_->current(); - glyph_code_units.end = grapheme_breaker_->next(); - if (glyph_code_units.end == icu::BreakIterator::DONE) - break; - subglyph_code_unit_counts.push_back(glyph_code_units.width()); - while (glyph_code_units.end < static_cast(text_count)) { - if (layout.getCharAdvance(glyph_code_units.end) != 0) - break; - if (grapheme_breaker_->next() == icu::BreakIterator::DONE) - break; - subglyph_code_unit_counts.push_back(grapheme_breaker_->current() - - glyph_code_units.end); - glyph_code_units.end = grapheme_breaker_->current(); + if (glyph_index < layout.nGlyphs()) { + glyph_code_units.end = layout.getGlyphCluster(glyph_index); + } else { + glyph_code_units.end = text_count; + } + + // The glyph may be a ligature. Determine how many graphemes are + // joined into this glyph and how many input code units map to + // each grapheme. + size_t code_unit_count = 1; + for (int32_t offset = glyph_code_units.start + 1; + offset < glyph_code_units.end; ++offset) { + if (minikin::GraphemeBreak::isGraphemeBreak( + layout_advances.data(), text_ptr, text_start, text_count, + offset)) { + grapheme_code_unit_counts.push_back(code_unit_count); + code_unit_count = 1; + } else { + code_unit_count++; + } } + grapheme_code_unit_counts.push_back(code_unit_count); } float glyph_advance = layout.getCharAdvance(glyph_code_units.start); - float subglyph_advance = - glyph_advance / subglyph_code_unit_counts.size(); + float grapheme_advance = + glyph_advance / grapheme_code_unit_counts.size(); glyph_positions.emplace_back(run_x_offset + glyph_x_offset, - subglyph_advance, + grapheme_advance, run.start() + glyph_code_units.start, - subglyph_code_unit_counts[0]); + grapheme_code_unit_counts[0]); - // Compute positions for the additional characters in the ligature. - for (size_t i = 1; i < subglyph_code_unit_counts.size(); ++i) { + // Compute positions for the additional graphemes in the ligature. + for (size_t i = 1; i < grapheme_code_unit_counts.size(); ++i) { glyph_positions.emplace_back( - glyph_positions.back().x_pos.end, subglyph_advance, + glyph_positions.back().x_pos.end, grapheme_advance, glyph_positions.back().code_units.start + - subglyph_code_unit_counts[i - 1], - subglyph_code_unit_counts[i]); + grapheme_code_unit_counts[i - 1], + grapheme_code_unit_counts[i]); } if (word_index < words.size() && diff --git a/third_party/txt/src/txt/paragraph.h b/third_party/txt/src/txt/paragraph.h index eabcc431b4486..4a40f89b387ae 100644 --- a/third_party/txt/src/txt/paragraph.h +++ b/third_party/txt/src/txt/paragraph.h @@ -184,7 +184,6 @@ class Paragraph { std::shared_ptr font_collection_; minikin::LineBreaker breaker_; - std::unique_ptr grapheme_breaker_; mutable std::unique_ptr word_breaker_; struct LineRange {