Skip to content

Commit

Permalink
Bug 1824671 - patch 2 - Create a minimal unicode-bidi-ffi crate to ex…
Browse files Browse the repository at this point in the history
…pose APIs needed by the intl::Bidi component. r=platform-i18n-reviewers,dminor

Rather than Diplomat-generated, this is hand-written to provide just the functionality needed in intl::Bidi,
to minimize the amount of glue code and ensure a close match to Gecko requirements.

Differential Revision: https://phabricator.services.mozilla.com/D197889
  • Loading branch information
jfkthame committed Jan 23, 2024
1 parent 1046878 commit e0e2a42
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 0 deletions.
7 changes: 7 additions & 0 deletions intl/bidi/moz.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
if CONFIG["COMPILE_ENVIRONMENT"]:
CbindgenHeader(
"unicode_bidi_ffi_generated.h", inputs=["/intl/bidi/rust/unicode-bidi-ffi"]
)
EXPORTS.mozilla.intl += [
"!unicode_bidi_ffi_generated.h",
]
9 changes: 9 additions & 0 deletions intl/bidi/rust/unicode-bidi-ffi/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[package]
name = "unicode-bidi-ffi"
version = "0.1.0"
license = "MPL-2.0"
authors = ["Jonathan Kew <[email protected]>"]
edition = "2021"

[dependencies]
unicode-bidi = "0.3.14"
15 changes: 15 additions & 0 deletions intl/bidi/rust/unicode-bidi-ffi/cbindgen.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
header = """/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */"""
autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */
"""
include_version = true
braces = "SameLine"
line_length = 100
tab_width = 2
language = "C++"
namespaces = ["mozilla", "intl", "ffi"]

[parse]
parse_deps = true
include = ["unicode-bidi"]
169 changes: 169 additions & 0 deletions intl/bidi/rust/unicode-bidi-ffi/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */

use unicode_bidi::level::Level;
use unicode_bidi::utf16;
use unicode_bidi::Direction;

use core::ops::Range;
use core::slice;

/// LevelRun type to be returned to C++.
/// 32-bit indexes (rather than usize) are sufficient here because Gecko works
/// with 32-bit indexes when collecting the text buffer for a paragraph.
#[repr(C)]
pub struct LevelRun {
start: u32,
length: u32,
level: u8,
}

/// Bidi object to be exposed to Gecko via FFI.
pub struct UnicodeBidi<'a> {
paragraph_info: utf16::ParagraphBidiInfo<'a>,
resolved: Option<(Vec<Level>, Vec<Range<usize>>)>,
}

impl UnicodeBidi<'_> {
/// Create a new UnicodeBidi object representing the given text. This creates
/// the unicode-bidi ParagraphBidiInfo struct, and will cache the resolved
/// levels and visual-runs array once created.
/// The caller is responsible to ensure the text buffer remains valid
/// as long as the UnicodeBidi object exists.
fn new<'a>(text: *const u16, length: usize, level: u8) -> Box<Self> {
let text = unsafe { slice::from_raw_parts(text, length) };
let level = if let Ok(level) = Level::new(level) {
Some(level)
} else {
None
};
Box::new(UnicodeBidi {
paragraph_info: utf16::ParagraphBidiInfo::<'a>::new(text, level),
resolved: None,
})
}

#[inline]
fn resolved(&mut self) -> &(Vec<Level>, Vec<Range<usize>>) {
if self.resolved.is_none() {
let len = self.paragraph_info.text.len();
self.resolved = Some(self.paragraph_info.visual_runs(0..len));
}
&self.resolved.as_ref().unwrap()
}
}

/// Create a new UnicodeBidi object for the given text.
/// NOTE that the text buffer must remain valid for the lifetime of this object!
#[no_mangle]
pub extern "C" fn bidi_new<'a>(text: *const u16, length: usize, level: u8) -> *mut UnicodeBidi<'a> {
Box::into_raw(UnicodeBidi::<'a>::new(text, length, level))
}

/// Destroy the Bidi object.
#[no_mangle]
pub extern "C" fn bidi_destroy(bidi: *mut UnicodeBidi) {
if bidi.is_null() {
return;
}
let _ = unsafe { Box::from_raw(bidi) };
}

/// Get the length of the text covered by the Bidi object.
#[no_mangle]
pub extern "C" fn bidi_get_length(bidi: *const UnicodeBidi) -> i32 {
let bidi = unsafe { &(*bidi) };
bidi.paragraph_info.text.len().try_into().unwrap()
}

/// Get the paragraph direction: LTR=1, RTL=-1, mixed=0.
#[no_mangle]
pub extern "C" fn bidi_get_direction(bidi: *const UnicodeBidi) -> i8 {
let bidi = unsafe { &(*bidi) };
match bidi.paragraph_info.direction() {
Direction::Mixed => 0,
Direction::Ltr => 1,
Direction::Rtl => -1,
}
}

/// Get the paragraph level.
#[no_mangle]
pub extern "C" fn bidi_get_paragraph_level(bidi: *const UnicodeBidi) -> u8 {
let bidi = unsafe { &(*bidi) };
bidi.paragraph_info.paragraph_level.into()
}

/// Get the number of runs present.
#[no_mangle]
pub extern "C" fn bidi_count_runs(bidi: *mut UnicodeBidi) -> i32 {
let bidi = unsafe { &mut (*bidi) };
if bidi.paragraph_info.text.is_empty() {
return 0;
}
bidi.resolved().1.len().try_into().unwrap()
}

/// Get a pointer to the Levels array. The resulting pointer is valid only as long as
/// the UnicodeBidi object exists!
#[no_mangle]
pub extern "C" fn bidi_get_levels(bidi: *mut UnicodeBidi) -> *const Level {
let bidi = unsafe { &mut (*bidi) };
bidi.resolved().0.as_ptr()
}

/// Get the extent of the run at the given index in the visual runs array.
/// This would panic!() if run_index is out of range (see bidi_count_runs),
/// or if the run's start or length exceeds u32::MAX (which cannot happen
/// because Gecko can't create such a huge text buffer).
#[no_mangle]
pub extern "C" fn bidi_get_visual_run(bidi: *mut UnicodeBidi, run_index: u32) -> LevelRun {
let bidi = unsafe { &mut (*bidi) };
let level_runs = &bidi.resolved().1;
let start = level_runs[run_index as usize].start;
let length = level_runs[run_index as usize].end - start;
LevelRun {
start: start.try_into().unwrap(),
length: length.try_into().unwrap(),
level: bidi.resolved().0[start].into(),
}
}

/// Return index map showing the result of reordering using the given levels array.
/// (This is a generic helper that does not use a UnicodeBidi object, it just takes an
/// arbitrary array of levels.)
#[no_mangle]
pub extern "C" fn bidi_reorder_visual(levels: *const u8, length: usize, index_map: *mut i32) {
let levels = unsafe { slice::from_raw_parts(levels as *const Level, length) };
let result = unsafe { slice::from_raw_parts_mut(index_map, length) };
let reordered = utf16::BidiInfo::reorder_visual(levels);
for i in 0..length {
result[i] = reordered[i].try_into().unwrap();
}
}

/// Get the base direction for the given text, returning 1 for LTR, -1 for RTL,
/// and 0 for neutral. If first_paragraph is true, only the first paragraph will be considered;
/// if false, subsequent paragraphs may be considered until a non-neutral character is found.
#[no_mangle]
pub extern "C" fn bidi_get_base_direction(
text: *const u16,
length: usize,
first_paragraph: bool,
) -> i8 {
let text = unsafe { slice::from_raw_parts(text, length) };
if first_paragraph {
match unicode_bidi::get_base_direction(text) {
Direction::Mixed => 0,
Direction::Ltr => 1,
Direction::Rtl => -1,
}
} else {
match unicode_bidi::get_base_direction_full(text) {
Direction::Mixed => 0,
Direction::Ltr => 1,
Direction::Rtl => -1,
}
}
}

0 comments on commit e0e2a42

Please sign in to comment.