From e0e2a426a2822991bf5ce6b9359f15eb02491c09 Mon Sep 17 00:00:00 2001 From: Jonathan Kew Date: Tue, 23 Jan 2024 11:53:20 +0000 Subject: [PATCH] Bug 1824671 - patch 2 - Create a minimal unicode-bidi-ffi crate to expose APIs needed by the intl::Bidi component. r=platform-i18n-reviewers,dminor Rather than Diplomat-generated, this is hand-written to provide just the functionality needed in intl::Bidi, to minimize the amount of glue code and ensure a close match to Gecko requirements. Differential Revision: https://phabricator.services.mozilla.com/D197889 --- intl/bidi/moz.build | 7 + intl/bidi/rust/unicode-bidi-ffi/Cargo.toml | 9 + intl/bidi/rust/unicode-bidi-ffi/cbindgen.toml | 15 ++ intl/bidi/rust/unicode-bidi-ffi/src/lib.rs | 169 ++++++++++++++++++ 4 files changed, 200 insertions(+) create mode 100644 intl/bidi/moz.build create mode 100644 intl/bidi/rust/unicode-bidi-ffi/Cargo.toml create mode 100644 intl/bidi/rust/unicode-bidi-ffi/cbindgen.toml create mode 100644 intl/bidi/rust/unicode-bidi-ffi/src/lib.rs diff --git a/intl/bidi/moz.build b/intl/bidi/moz.build new file mode 100644 index 0000000000000..9407ac6a3968d --- /dev/null +++ b/intl/bidi/moz.build @@ -0,0 +1,7 @@ +if CONFIG["COMPILE_ENVIRONMENT"]: + CbindgenHeader( + "unicode_bidi_ffi_generated.h", inputs=["/intl/bidi/rust/unicode-bidi-ffi"] + ) + EXPORTS.mozilla.intl += [ + "!unicode_bidi_ffi_generated.h", + ] diff --git a/intl/bidi/rust/unicode-bidi-ffi/Cargo.toml b/intl/bidi/rust/unicode-bidi-ffi/Cargo.toml new file mode 100644 index 0000000000000..a61e5d1e4f228 --- /dev/null +++ b/intl/bidi/rust/unicode-bidi-ffi/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "unicode-bidi-ffi" +version = "0.1.0" +license = "MPL-2.0" +authors = ["Jonathan Kew "] +edition = "2021" + +[dependencies] +unicode-bidi = "0.3.14" diff --git a/intl/bidi/rust/unicode-bidi-ffi/cbindgen.toml b/intl/bidi/rust/unicode-bidi-ffi/cbindgen.toml new file mode 100644 index 0000000000000..a345d2b46a27a --- /dev/null +++ b/intl/bidi/rust/unicode-bidi-ffi/cbindgen.toml @@ -0,0 +1,15 @@ +header = """/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */""" +autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */ +""" +include_version = true +braces = "SameLine" +line_length = 100 +tab_width = 2 +language = "C++" +namespaces = ["mozilla", "intl", "ffi"] + +[parse] +parse_deps = true +include = ["unicode-bidi"] diff --git a/intl/bidi/rust/unicode-bidi-ffi/src/lib.rs b/intl/bidi/rust/unicode-bidi-ffi/src/lib.rs new file mode 100644 index 0000000000000..9444855637c1f --- /dev/null +++ b/intl/bidi/rust/unicode-bidi-ffi/src/lib.rs @@ -0,0 +1,169 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use unicode_bidi::level::Level; +use unicode_bidi::utf16; +use unicode_bidi::Direction; + +use core::ops::Range; +use core::slice; + +/// LevelRun type to be returned to C++. +/// 32-bit indexes (rather than usize) are sufficient here because Gecko works +/// with 32-bit indexes when collecting the text buffer for a paragraph. +#[repr(C)] +pub struct LevelRun { + start: u32, + length: u32, + level: u8, +} + +/// Bidi object to be exposed to Gecko via FFI. +pub struct UnicodeBidi<'a> { + paragraph_info: utf16::ParagraphBidiInfo<'a>, + resolved: Option<(Vec, Vec>)>, +} + +impl UnicodeBidi<'_> { + /// Create a new UnicodeBidi object representing the given text. This creates + /// the unicode-bidi ParagraphBidiInfo struct, and will cache the resolved + /// levels and visual-runs array once created. + /// The caller is responsible to ensure the text buffer remains valid + /// as long as the UnicodeBidi object exists. + fn new<'a>(text: *const u16, length: usize, level: u8) -> Box { + let text = unsafe { slice::from_raw_parts(text, length) }; + let level = if let Ok(level) = Level::new(level) { + Some(level) + } else { + None + }; + Box::new(UnicodeBidi { + paragraph_info: utf16::ParagraphBidiInfo::<'a>::new(text, level), + resolved: None, + }) + } + + #[inline] + fn resolved(&mut self) -> &(Vec, Vec>) { + if self.resolved.is_none() { + let len = self.paragraph_info.text.len(); + self.resolved = Some(self.paragraph_info.visual_runs(0..len)); + } + &self.resolved.as_ref().unwrap() + } +} + +/// Create a new UnicodeBidi object for the given text. +/// NOTE that the text buffer must remain valid for the lifetime of this object! +#[no_mangle] +pub extern "C" fn bidi_new<'a>(text: *const u16, length: usize, level: u8) -> *mut UnicodeBidi<'a> { + Box::into_raw(UnicodeBidi::<'a>::new(text, length, level)) +} + +/// Destroy the Bidi object. +#[no_mangle] +pub extern "C" fn bidi_destroy(bidi: *mut UnicodeBidi) { + if bidi.is_null() { + return; + } + let _ = unsafe { Box::from_raw(bidi) }; +} + +/// Get the length of the text covered by the Bidi object. +#[no_mangle] +pub extern "C" fn bidi_get_length(bidi: *const UnicodeBidi) -> i32 { + let bidi = unsafe { &(*bidi) }; + bidi.paragraph_info.text.len().try_into().unwrap() +} + +/// Get the paragraph direction: LTR=1, RTL=-1, mixed=0. +#[no_mangle] +pub extern "C" fn bidi_get_direction(bidi: *const UnicodeBidi) -> i8 { + let bidi = unsafe { &(*bidi) }; + match bidi.paragraph_info.direction() { + Direction::Mixed => 0, + Direction::Ltr => 1, + Direction::Rtl => -1, + } +} + +/// Get the paragraph level. +#[no_mangle] +pub extern "C" fn bidi_get_paragraph_level(bidi: *const UnicodeBidi) -> u8 { + let bidi = unsafe { &(*bidi) }; + bidi.paragraph_info.paragraph_level.into() +} + +/// Get the number of runs present. +#[no_mangle] +pub extern "C" fn bidi_count_runs(bidi: *mut UnicodeBidi) -> i32 { + let bidi = unsafe { &mut (*bidi) }; + if bidi.paragraph_info.text.is_empty() { + return 0; + } + bidi.resolved().1.len().try_into().unwrap() +} + +/// Get a pointer to the Levels array. The resulting pointer is valid only as long as +/// the UnicodeBidi object exists! +#[no_mangle] +pub extern "C" fn bidi_get_levels(bidi: *mut UnicodeBidi) -> *const Level { + let bidi = unsafe { &mut (*bidi) }; + bidi.resolved().0.as_ptr() +} + +/// Get the extent of the run at the given index in the visual runs array. +/// This would panic!() if run_index is out of range (see bidi_count_runs), +/// or if the run's start or length exceeds u32::MAX (which cannot happen +/// because Gecko can't create such a huge text buffer). +#[no_mangle] +pub extern "C" fn bidi_get_visual_run(bidi: *mut UnicodeBidi, run_index: u32) -> LevelRun { + let bidi = unsafe { &mut (*bidi) }; + let level_runs = &bidi.resolved().1; + let start = level_runs[run_index as usize].start; + let length = level_runs[run_index as usize].end - start; + LevelRun { + start: start.try_into().unwrap(), + length: length.try_into().unwrap(), + level: bidi.resolved().0[start].into(), + } +} + +/// Return index map showing the result of reordering using the given levels array. +/// (This is a generic helper that does not use a UnicodeBidi object, it just takes an +/// arbitrary array of levels.) +#[no_mangle] +pub extern "C" fn bidi_reorder_visual(levels: *const u8, length: usize, index_map: *mut i32) { + let levels = unsafe { slice::from_raw_parts(levels as *const Level, length) }; + let result = unsafe { slice::from_raw_parts_mut(index_map, length) }; + let reordered = utf16::BidiInfo::reorder_visual(levels); + for i in 0..length { + result[i] = reordered[i].try_into().unwrap(); + } +} + +/// Get the base direction for the given text, returning 1 for LTR, -1 for RTL, +/// and 0 for neutral. If first_paragraph is true, only the first paragraph will be considered; +/// if false, subsequent paragraphs may be considered until a non-neutral character is found. +#[no_mangle] +pub extern "C" fn bidi_get_base_direction( + text: *const u16, + length: usize, + first_paragraph: bool, +) -> i8 { + let text = unsafe { slice::from_raw_parts(text, length) }; + if first_paragraph { + match unicode_bidi::get_base_direction(text) { + Direction::Mixed => 0, + Direction::Ltr => 1, + Direction::Rtl => -1, + } + } else { + match unicode_bidi::get_base_direction_full(text) { + Direction::Mixed => 0, + Direction::Ltr => 1, + Direction::Rtl => -1, + } + } +}