diff --git a/src/explicit.rs b/src/explicit.rs index 3c98bf4..5760ab8 100644 --- a/src/explicit.rs +++ b/src/explicit.rs @@ -19,12 +19,17 @@ use super::char_data::{ BidiClass::{self, *}, }; use super::level::Level; +use super::prepare::removed_by_x9; +use super::LevelRunVec; use super::TextSource; -/// Compute explicit embedding levels for one paragraph of text (X1-X8). +/// Compute explicit embedding levels for one paragraph of text (X1-X8), and identify +/// level runs (BD7) for use when determining Isolating Run Sequences (X10). /// /// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`, /// for each char in `text`. +/// +/// `runs` returns the list of level runs (BD7) of the text. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn compute<'a, T: TextSource<'a> + ?Sized>( text: &'a T, @@ -32,6 +37,7 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( original_classes: &[BidiClass], levels: &mut [Level], processing_classes: &mut [BidiClass], + runs: &mut LevelRunVec, ) { assert_eq!(text.len(), original_classes.len()); @@ -51,6 +57,9 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( let mut overflow_embedding_count = 0u32; let mut valid_isolate_count = 0u32; + let mut current_run_level = Level::ltr(); + let mut current_run_start = 0; + for (i, len) in text.indices_lengths() { let last = stack.last().unwrap(); @@ -182,6 +191,26 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( levels[i + j] = levels[i]; processing_classes[i + j] = processing_classes[i]; } + + // Identify level runs to be passed to prepare::isolating_run_sequences(). + if i == 0 { + // Initialize for the first (or only) run. + current_run_level = levels[i]; + } else { + // Check if we need to start a new level run. + // + if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level { + // End the last run and start a new one. + runs.push(current_run_start..i); + current_run_level = levels[i]; + current_run_start = i; + } + } + } + + // Append the trailing level run, if non-empty. + if levels.len() > current_run_start { + runs.push(current_run_start..levels.len()); } } diff --git a/src/lib.rs b/src/lib.rs index dfe3b22..4899275 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -88,7 +88,7 @@ mod prepare; pub use crate::char_data::{BidiClass, UNICODE_VERSION}; pub use crate::data_source::BidiDataSource; pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL}; -pub use crate::prepare::LevelRun; +pub use crate::prepare::{LevelRun, LevelRunVec}; #[cfg(feature = "hardcoded-data")] pub use crate::char_data::{bidi_class, HardcodedBidiData}; @@ -248,8 +248,14 @@ struct InitialInfoExt<'text> { /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that /// requires no further bidi processing (i.e. there are no RTL characters or bidi - /// control codes present). - pure_ltr: Vec, + /// control codes present), and whether any bidi isolation controls are present. + flags: Vec, +} + +#[derive(PartialEq, Debug)] +struct ParagraphInfoFlags { + is_pure_ltr: bool, + has_isolate_controls: bool, } impl<'text> InitialInfoExt<'text> { @@ -269,12 +275,12 @@ impl<'text> InitialInfoExt<'text> { default_para_level: Option, ) -> InitialInfoExt<'a> { let mut paragraphs = Vec::::new(); - let mut pure_ltr = Vec::::new(); - let (original_classes, _, _) = compute_initial_info( + let mut flags = Vec::::new(); + let (original_classes, _, _, _) = compute_initial_info( data_source, text, default_para_level, - Some((&mut paragraphs, &mut pure_ltr)), + Some((&mut paragraphs, &mut flags)), ); InitialInfoExt { @@ -283,7 +289,7 @@ impl<'text> InitialInfoExt<'text> { original_classes, paragraphs, }, - pure_ltr, + flags, } } } @@ -299,8 +305,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( data_source: &D, text: &'a T, default_para_level: Option, - mut split_paragraphs: Option<(&mut Vec, &mut Vec)>, -) -> (Vec, Level, bool) { + mut split_paragraphs: Option<(&mut Vec, &mut Vec)>, +) -> (Vec, Level, bool, bool) { let mut original_classes = Vec::with_capacity(text.len()); // The stack contains the starting code unit index for each nested isolate we're inside. @@ -310,8 +316,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( let mut isolate_stack = Vec::new(); debug_assert!( - if let Some((ref paragraphs, ref pure_ltr)) = split_paragraphs { - paragraphs.is_empty() && pure_ltr.is_empty() + if let Some((ref paragraphs, ref flags)) = split_paragraphs { + paragraphs.is_empty() && flags.is_empty() } else { true } @@ -323,6 +329,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // Per-paragraph flag: can subsequent processing be skipped? Set to false if any // RTL characters or bidi control characters are encountered in the paragraph. let mut is_pure_ltr = true; + // Set to true if any bidi isolation controls are present in the paragraph. + let mut has_isolate_controls = false; #[cfg(feature = "flame_it")] flame::start("compute_initial_info(): iter text.char_indices()"); @@ -341,7 +349,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( match class { B => { - if let Some((ref mut paragraphs, ref mut pure_ltr)) = split_paragraphs { + if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs { // P1. Split the text into separate paragraphs. The paragraph separator is kept // with the previous paragraph. let para_end = i + len; @@ -350,7 +358,10 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // P3. If no character is found in p2, set the paragraph level to zero. level: para_level.unwrap_or(LTR_LEVEL), }); - pure_ltr.push(is_pure_ltr); + flags.push(ParagraphInfoFlags { + is_pure_ltr, + has_isolate_controls, + }); // Reset state for the start of the next paragraph. para_start = para_end; // TODO: Support defaulting to direction of previous paragraph @@ -358,6 +369,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // para_level = default_para_level; is_pure_ltr = true; + has_isolate_controls = false; isolate_stack.clear(); } } @@ -394,6 +406,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( RLI | LRI | FSI => { is_pure_ltr = false; + has_isolate_controls = true; isolate_stack.push(i); } @@ -405,15 +418,18 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( } } - if let Some((paragraphs, pure_ltr)) = split_paragraphs { + if let Some((paragraphs, flags)) = split_paragraphs { if para_start < text.len() { paragraphs.push(ParagraphInfo { range: para_start..text.len(), level: para_level.unwrap_or(LTR_LEVEL), }); - pure_ltr.push(is_pure_ltr); + flags.push(ParagraphInfoFlags { + is_pure_ltr, + has_isolate_controls, + }); } - debug_assert_eq!(paragraphs.len(), pure_ltr.len()); + debug_assert_eq!(paragraphs.len(), flags.len()); } debug_assert_eq!(original_classes.len(), text.len()); @@ -424,6 +440,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( original_classes, para_level.unwrap_or(LTR_LEVEL), is_pure_ltr, + has_isolate_controls, ) } @@ -482,20 +499,21 @@ impl<'text> BidiInfo<'text> { text: &'a str, default_para_level: Option, ) -> BidiInfo<'a> { - let InitialInfoExt { base, pure_ltr, .. } = + let InitialInfoExt { base, flags, .. } = InitialInfoExt::new_with_data_source(data_source, text, default_para_level); let mut levels = Vec::::with_capacity(text.len()); let mut processing_classes = base.original_classes.clone(); - for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) { + for (para, flags) in base.paragraphs.iter().zip(flags.iter()) { let text = &text[para.range.clone()]; let original_classes = &base.original_classes[para.range.clone()]; compute_bidi_info_for_para( data_source, para, - *is_pure_ltr, + flags.is_pure_ltr, + flags.has_isolate_controls, text, original_classes, &mut processing_classes, @@ -720,7 +738,7 @@ impl<'text> ParagraphBidiInfo<'text> { ) -> ParagraphBidiInfo<'a> { // Here we could create a ParagraphInitialInfo struct to parallel the one // used by BidiInfo, but there doesn't seem any compelling reason for it. - let (original_classes, paragraph_level, is_pure_ltr) = + let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) = compute_initial_info(data_source, text, default_para_level, None); let mut levels = Vec::::with_capacity(text.len()); @@ -738,6 +756,7 @@ impl<'text> ParagraphBidiInfo<'text> { data_source, ¶_info, is_pure_ltr, + has_isolate_controls, text, &original_classes, &mut processing_classes, @@ -1066,6 +1085,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> data_source: &D, para: &ParagraphInfo, is_pure_ltr: bool, + has_isolate_controls: bool, text: &'a T, original_classes: &[BidiClass], processing_classes: &mut [BidiClass], @@ -1079,6 +1099,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> let processing_classes = &mut processing_classes[para.range.clone()]; let levels = &mut levels[para.range.clone()]; + let mut level_runs = LevelRunVec::new(); explicit::compute( text, @@ -1086,9 +1107,18 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> original_classes, levels, processing_classes, + &mut level_runs, ); - let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels); + let mut sequences = prepare::IsolatingRunSequenceVec::new(); + prepare::isolating_run_sequences( + para.level, + original_classes, + levels, + level_runs, + has_isolate_controls, + &mut sequences, + ); for sequence in &sequences { implicit::resolve_weak(text, sequence, processing_classes); implicit::resolve_neutral( @@ -1100,6 +1130,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> processing_classes, ); } + implicit::resolve_levels(processing_classes, levels); assign_levels_to_removed_chars(para.level, original_classes, levels); @@ -1549,6 +1580,24 @@ mod tests { #[cfg(feature = "hardcoded-data")] fn test_process_text() { let tests = vec![ + ( + // text + "", + // base level + Some(RTL_LEVEL), + // levels + Level::vec(&[]), + // original_classes + vec![], + // paragraphs + vec![], + // levels_u16 + Level::vec(&[]), + // original_classes_u16 + vec![], + // paragraphs_u16 + vec![], + ), ( // text "abc123", @@ -1710,6 +1759,19 @@ mod tests { paragraphs: t.4.clone(), } ); + // If it was empty, also test that ParagraphBidiInfo handles it safely. + if t.4.len() == 0 { + assert_eq!( + ParagraphBidiInfo::new(t.0, t.1), + ParagraphBidiInfo { + text: t.0, + original_classes: t.3.clone(), + levels: t.2.clone(), + paragraph_level: RTL_LEVEL, + is_pure_ltr: true, + } + ) + } // If it was a single paragraph, also test ParagraphBidiInfo. if t.4.len() == 1 { assert_eq!( diff --git a/src/prepare.rs b/src/prepare.rs index f864229..f7b35ad 100644 --- a/src/prepare.rs +++ b/src/prepare.rs @@ -25,6 +25,11 @@ use super::BidiClass::{self, *}; /// Represented as a range of byte indices. pub type LevelRun = Range; +#[cfg(feature = "smallvec")] +pub type LevelRunVec = SmallVec<[LevelRun; 8]>; +#[cfg(not(feature = "smallvec"))] +pub type LevelRunVec = Vec; + /// Output of `isolating_run_sequences` (steps X9-X10) #[derive(Debug, PartialEq)] pub struct IsolatingRunSequence { @@ -33,6 +38,11 @@ pub struct IsolatingRunSequence { pub eos: BidiClass, // End-of-sequence type. } +#[cfg(feature = "smallvec")] +pub type IsolatingRunSequenceVec = SmallVec<[IsolatingRunSequence; 8]>; +#[cfg(not(feature = "smallvec"))] +pub type IsolatingRunSequenceVec = Vec; + /// Compute the set of isolating run sequences. /// /// An isolating run sequence is a maximal sequence of level runs such that for all level runs @@ -45,8 +55,59 @@ pub fn isolating_run_sequences( para_level: Level, original_classes: &[BidiClass], levels: &[Level], -) -> Vec { - let runs = level_runs(levels, original_classes); + runs: LevelRunVec, + has_isolate_controls: bool, + isolating_run_sequences: &mut IsolatingRunSequenceVec, +) { + // Per http://www.unicode.org/reports/tr9/#BD13: + // "In the absence of isolate initiators, each isolating run sequence in a paragraph + // consists of exactly one level run, and each level run constitutes a separate + // isolating run sequence." + // We can take a simplified path to handle this case. + if !has_isolate_controls { + isolating_run_sequences.reserve_exact(runs.len()); + for run in runs { + // Determine the `sos` and `eos` class for the sequence. + // + + let run_levels = &levels[run.clone()]; + let run_classes = &original_classes[run.clone()]; + let seq_level = run_levels[run_classes + .iter() + .position(|c| not_removed_by_x9(c)) + .unwrap_or(0)]; + + let end_level = run_levels[run_classes + .iter() + .rposition(|c| not_removed_by_x9(c)) + .unwrap_or(run.end - run.start - 1)]; + + // Get the level of the last non-removed char before the run. + let pred_level = match original_classes[..run.start] + .iter() + .rposition(not_removed_by_x9) + { + Some(idx) => levels[idx], + None => para_level, + }; + + // Get the level of the next non-removed char after the run. + let succ_level = match original_classes[run.end..] + .iter() + .position(not_removed_by_x9) + { + Some(idx) => levels[run.end + idx], + None => para_level, + }; + + isolating_run_sequences.push(IsolatingRunSequence { + runs: vec![run], + sos: max(seq_level, pred_level).bidi_class(), + eos: max(end_level, succ_level).bidi_class(), + }); + } + return; + } // Compute the set of isolating run sequences. // @@ -98,79 +159,77 @@ pub fn isolating_run_sequences( // Determine the `sos` and `eos` class for each sequence. // - sequences - .into_iter() - .map(|sequence: Vec| { - assert!(!sequence.is_empty()); + for sequence in sequences { + assert!(!sequence.is_empty()); - let mut result = IsolatingRunSequence { - runs: sequence, - sos: L, - eos: L, - }; + let start_of_seq = sequence[0].start; + let runs_len = sequence.len(); + let end_of_seq = sequence[runs_len - 1].end; + + let mut result = IsolatingRunSequence { + runs: sequence, + sos: L, + eos: L, + }; - let start_of_seq = result.runs[0].start; - let runs_len = result.runs.len(); - let end_of_seq = result.runs[runs_len - 1].end; - - // > (not counting characters removed by X9) - let seq_level = levels[result - .iter_forwards_from(start_of_seq, 0) - .find(|i| not_removed_by_x9(&original_classes[*i])) - .unwrap_or(start_of_seq)]; - - // XXXManishearth the spec talks of a start and end level, - // but for a given IRS the two should be equivalent, yes? - let end_level = levels[result - .iter_backwards_from(end_of_seq, runs_len - 1) - .find(|i| not_removed_by_x9(&original_classes[*i])) - .unwrap_or(end_of_seq - 1)]; - - #[cfg(test)] - for idx in result.runs.clone().into_iter().flatten() { - if not_removed_by_x9(&original_classes[idx]) { - assert_eq!(seq_level, levels[idx]); - } + // > (not counting characters removed by X9) + let seq_level = levels[result + .iter_forwards_from(start_of_seq, 0) + .find(|i| not_removed_by_x9(&original_classes[*i])) + .unwrap_or(start_of_seq)]; + + // XXXManishearth the spec talks of a start and end level, + // but for a given IRS the two should be equivalent, yes? + let end_level = levels[result + .iter_backwards_from(end_of_seq, runs_len - 1) + .find(|i| not_removed_by_x9(&original_classes[*i])) + .unwrap_or(end_of_seq - 1)]; + + #[cfg(test)] + for idx in result.runs.clone().into_iter().flatten() { + if not_removed_by_x9(&original_classes[idx]) { + assert_eq!(seq_level, levels[idx]); } + } + + // Get the level of the last non-removed char before the runs. + let pred_level = match original_classes[..start_of_seq] + .iter() + .rposition(not_removed_by_x9) + { + Some(idx) => levels[idx], + None => para_level, + }; + + // Get the last non-removed character to check if it is an isolate initiator. + // The spec calls for an unmatched one, but matched isolate initiators + // will never be at the end of a level run (otherwise there would be more to the run). + // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check. + let last_non_removed = original_classes[..end_of_seq] + .iter() + .copied() + .rev() + .find(not_removed_by_x9) + .unwrap_or(BN); - // Get the level of the last non-removed char before the runs. - let pred_level = match original_classes[..start_of_seq] + // Get the level of the next non-removed char after the runs. + let succ_level = if matches!(last_non_removed, RLI | LRI | FSI) { + para_level + } else { + match original_classes[end_of_seq..] .iter() - .rposition(not_removed_by_x9) + .position(not_removed_by_x9) { - Some(idx) => levels[idx], + Some(idx) => levels[end_of_seq + idx], None => para_level, - }; + } + }; - // Get the last non-removed character to check if it is an isolate initiator. - // The spec calls for an unmatched one, but matched isolate initiators - // will never be at the end of a level run (otherwise there would be more to the run). - // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check. - let last_non_removed = original_classes[..end_of_seq] - .iter() - .copied() - .rev() - .find(not_removed_by_x9) - .unwrap_or(BN); - - // Get the level of the next non-removed char after the runs. - let succ_level = if matches!(last_non_removed, RLI | LRI | FSI) { - para_level - } else { - match original_classes[end_of_seq..] - .iter() - .position(not_removed_by_x9) - { - Some(idx) => levels[end_of_seq + idx], - None => para_level, - } - }; + result.sos = max(seq_level, pred_level).bidi_class(); + result.eos = max(end_level, succ_level).bidi_class(); - result.sos = max(seq_level, pred_level).bidi_class(); - result.eos = max(end_level, succ_level).bidi_class(); - result - }) - .collect() + isolating_run_sequences.push(result); + } } impl IsolatingRunSequence { @@ -217,6 +276,9 @@ impl IsolatingRunSequence { /// Finds the level runs in a paragraph. /// /// +/// +/// This is only used by tests; normally level runs are identified during explicit::compute. +#[cfg(test)] fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec { assert_eq!(levels.len(), original_classes.len()); @@ -276,7 +338,14 @@ mod tests { let classes = &[L, RLE, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + false, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), @@ -289,7 +358,14 @@ mod tests { let classes = &[L, RLI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), @@ -302,7 +378,14 @@ mod tests { let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), @@ -321,7 +404,14 @@ mod tests { let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + false, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1 @@ -380,7 +470,14 @@ mod tests { let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1·RLI·PDI·RLI·PDI·text6 diff --git a/src/utf16.rs b/src/utf16.rs index 02f83aa..11b386f 100644 --- a/src/utf16.rs +++ b/src/utf16.rs @@ -18,7 +18,9 @@ use crate::{ compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels, reorder_visual, visual_runs_for_line, }; -use crate::{BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo}; +use crate::{ + BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo, ParagraphInfoFlags, +}; #[cfg(feature = "hardcoded-data")] use crate::HardcodedBidiData; @@ -83,7 +85,7 @@ struct InitialInfoExt<'text> { /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that /// requires no further bidi processing (i.e. there are no RTL characters or bidi /// control codes present). - pure_ltr: Vec, + flags: Vec, } impl<'text> InitialInfoExt<'text> { @@ -103,12 +105,12 @@ impl<'text> InitialInfoExt<'text> { default_para_level: Option, ) -> InitialInfoExt<'a> { let mut paragraphs = Vec::::new(); - let mut pure_ltr = Vec::::new(); - let (original_classes, _, _) = compute_initial_info( + let mut flags = Vec::::new(); + let (original_classes, _, _, _) = compute_initial_info( data_source, text, default_para_level, - Some((&mut paragraphs, &mut pure_ltr)), + Some((&mut paragraphs, &mut flags)), ); InitialInfoExt { @@ -117,7 +119,7 @@ impl<'text> InitialInfoExt<'text> { original_classes, paragraphs, }, - pure_ltr, + flags, } } } @@ -177,20 +179,21 @@ impl<'text> BidiInfo<'text> { text: &'a [u16], default_para_level: Option, ) -> BidiInfo<'a> { - let InitialInfoExt { base, pure_ltr, .. } = + let InitialInfoExt { base, flags, .. } = InitialInfoExt::new_with_data_source(data_source, text, default_para_level); let mut levels = Vec::::with_capacity(text.len()); let mut processing_classes = base.original_classes.clone(); - for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) { + for (para, flags) in base.paragraphs.iter().zip(flags.iter()) { let text = &text[para.range.clone()]; let original_classes = &base.original_classes[para.range.clone()]; compute_bidi_info_for_para( data_source, para, - *is_pure_ltr, + flags.is_pure_ltr, + flags.has_isolate_controls, text, original_classes, &mut processing_classes, @@ -411,7 +414,7 @@ impl<'text> ParagraphBidiInfo<'text> { ) -> ParagraphBidiInfo<'a> { // Here we could create a ParagraphInitialInfo struct to parallel the one // used by BidiInfo, but there doesn't seem any compelling reason for it. - let (original_classes, paragraph_level, is_pure_ltr) = + let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) = compute_initial_info(data_source, text, default_para_level, None); let mut levels = Vec::::with_capacity(text.len()); @@ -429,6 +432,7 @@ impl<'text> ParagraphBidiInfo<'text> { data_source, ¶_info, is_pure_ltr, + has_isolate_controls, text, &original_classes, &mut processing_classes,