Skip to content

Commit

Permalink
Auto merge of #35 - behnam:dev, r=mbrubeck
Browse files Browse the repository at this point in the history
Fix a bug in reorder_line() and improve benchmarks

Summary of changes:

* Fix bug in reorder_line() where optimized branch returned the full text (instead of line text), after (supposedly) reorder.

* Improve benchmarks logic to measure `reorder_line()` in isolation, and for full text of the test.

* Add new `basic` benchmarks to be able to measure perf for small, common text strings.

* Move `serde_test` to `dev-dependencies`, as it's only used in `test` profile.

* Bump version to `0.3.2`.

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/unicode-bidi/35)
<!-- Reviewable:end -->
  • Loading branch information
bors-servo authored May 24, 2017
2 parents d2d1180 + 6b20563 commit 36c0603
Show file tree
Hide file tree
Showing 11 changed files with 173 additions and 74 deletions.
8 changes: 5 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "unicode-bidi"
version = "0.3.1"
version = "0.3.2"
authors = ["The Servo Project Developers"]
license = "MIT / Apache-2.0"
description = "Implementation of the Unicode Bidirectional Algorithm"
Expand All @@ -16,10 +16,12 @@ name = "unicode_bidi"
[dependencies]
matches = "0.1"
serde = {version = ">=0.8, <2.0", optional = true}
serde_test = {version = ">=0.8, <2.0", optional = true}
serde_derive = {version = ">=0.8, <2.0", optional = true}

[dev-dependencies]
serde_test = ">=0.8, <2.0"

[features]
default = []
unstable = [] # Use in benches/
with_serde = ["serde", "serde_test", "serde_derive"]
with_serde = ["serde", "serde_derive"]
69 changes: 69 additions & 0 deletions benches/basic.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2014 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#![cfg(all(test, feature = "unstable"))]
#![feature(test)]

extern crate test;
extern crate unicode_bidi;

use test::Bencher;

use unicode_bidi::BidiInfo;


const LTR_TEXTS: &[&str] = &[
"abc\ndef\nghi",
"abc 123\ndef 456\nghi 789",
];

const BIDI_TEXTS: &[&str] = &[
"ابجد\nهوز\nحتی",
"ابجد ۱۲۳\nهوز ۴۵۶\nحتی ۷۸۹",
];


fn bench_bidi_info_new(b: &mut Bencher, texts: &[&str]) {
for text in texts {
b.iter(|| { BidiInfo::new(text, None); });
}
}

fn bench_reorder_line(b: &mut Bencher, texts: &[&str]) {
for text in texts {
let bidi_info = BidiInfo::new(text, None);
b.iter(
|| for para in &bidi_info.paragraphs {
let line = para.range.clone();
bidi_info.reorder_line(para, line);
}
);
}
}


#[bench]
fn bench_1_bidi_info_new_for_ltr_texts(b: &mut Bencher) {
bench_bidi_info_new(b, LTR_TEXTS);
}

#[bench]
fn bench_2_bidi_info_new_for_bidi_texts(b: &mut Bencher) {
bench_bidi_info_new(b, BIDI_TEXTS);
}

#[bench]
fn bench_3_reorder_line_for_ltr_texts(b: &mut Bencher) {
bench_reorder_line(b, LTR_TEXTS);
}

#[bench]
fn bench_4_reorder_line_for_bidi_texts(b: &mut Bencher) {
bench_reorder_line(b, BIDI_TEXTS);
}
65 changes: 32 additions & 33 deletions benches/udhr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,21 @@ use test::Bencher;

use unicode_bidi::BidiInfo;


const LTR_TEXTS: &[&str] = &[
include_str!("udhr_data/ltr/udhr_acu_1.txt"),
include_str!("udhr_data/ltr/udhr_auc.txt"),
include_str!("udhr_data/ltr/udhr_eng.txt"),
include_str!("udhr_data/ltr/udhr_knc.txt"),
include_str!("udhr_data/ltr/udhr_krl.txt"),
include_str!("udhr_data/ltr/udhr_kwi.txt"),
include_str!("udhr_data/ltr/udhr_lot.txt"),
include_str!("udhr_data/ltr/udhr_mly_latn.txt"),
include_str!("udhr_data/ltr/udhr_piu.txt"),
include_str!("udhr_data/ltr/udhr_qug.txt"),
include_str!("udhr_data/ltr/udhr_snn.txt"),
include_str!("udhr_data/ltr/udhr_tiv.txt"),
include_str!("udhr_data/ltr/udhr_uig_latn.txt"),
];

const BIDI_TEXTS: &[&str] = &[
Expand All @@ -37,56 +41,51 @@ const BIDI_TEXTS: &[&str] = &[
include_str!("udhr_data/bidi/udhr_pes_1.txt"),
include_str!("udhr_data/bidi/udhr_skr.txt"),
include_str!("udhr_data/bidi/udhr_urd.txt"),
include_str!("udhr_data/bidi/udhr_eng.txt"),
include_str!("udhr_data/bidi/udhr_mly_latn.txt"),
include_str!("udhr_data/bidi/udhr_pes_2.txt"),
include_str!("udhr_data/bidi/udhr_uig_arab.txt"),
include_str!("udhr_data/bidi/udhr_urd_2.txt"),
include_str!("udhr_data/bidi/udhr_heb.txt"),
include_str!("udhr_data/bidi/udhr_pbu.txt"),
include_str!("udhr_data/bidi/udhr_pnb.txt"),
include_str!("udhr_data/bidi/udhr_uig_latn.txt"),
include_str!("udhr_data/bidi/udhr_ydd.txt"),
];


fn bench_bidi_info_new(b: &mut Bencher, texts: &[&str]) {
for text in texts {
b.iter(|| { BidiInfo::new(text, None); });
}
}

fn bench_reorder_line(b: &mut Bencher, texts: &[&str]) {
for text in texts {
let bidi_info = BidiInfo::new(text, None);
b.iter(
|| for para in &bidi_info.paragraphs {
let line = para.range.clone();
bidi_info.reorder_line(para, line);
}
);
}
}


#[bench]
fn bench_bidi_info_new_for_ltr_texts(b: &mut Bencher) {
b.iter(
|| for text in LTR_TEXTS {
BidiInfo::new(text, None);
},
);
fn bench_1_bidi_info_new_for_ltr_texts(b: &mut Bencher) {
bench_bidi_info_new(b, LTR_TEXTS);
}

#[bench]
fn bench_bidi_info_new_for_bidi_texts(b: &mut Bencher) {
b.iter(
|| for text in BIDI_TEXTS {
BidiInfo::new(text, None);
},
);
fn bench_2_bidi_info_new_for_bidi_texts(b: &mut Bencher) {
bench_bidi_info_new(b, BIDI_TEXTS);
}

#[bench]
fn bench_bidi_info_new_and_reordered_for_ltr_texts(b: &mut Bencher) {
b.iter(
|| for text in LTR_TEXTS {
let bidi_info = BidiInfo::new(text, None);
let para = &bidi_info.paragraphs[0];
let line = para.range.clone();
bidi_info.reordered_levels(para, line);
},
);
fn bench_3_reorder_line_for_ltr_texts(b: &mut Bencher) {
bench_reorder_line(b, LTR_TEXTS);
}

#[bench]
fn bench_bidi_info_new_and_reordered_for_bidi_texts(b: &mut Bencher) {
b.iter(
|| for text in BIDI_TEXTS {
let bidi_info = BidiInfo::new(text, None);
let para = &bidi_info.paragraphs[0];
let line = para.range.clone();
bidi_info.reordered_levels(para, line);
},
);
fn bench_4_reorder_line_for_bidi_texts(b: &mut Bencher) {
bench_reorder_line(b, BIDI_TEXTS);
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion src/char_data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) ->
Less
} else {
Greater
},
}
) {
Ok(idx) => {
let (_, _, cat) = r[idx];
Expand Down
7 changes: 3 additions & 4 deletions src/explicit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,12 @@ pub fn compute(
overflow_embedding_count == 0 {
let new_level = new_level.unwrap();
stack.push(
new_level,
match initial_classes[i] {
new_level, match initial_classes[i] {
RLO => OverrideStatus::RTL,
LRO => OverrideStatus::LTR,
RLI | LRI | FSI => OverrideStatus::Isolate,
_ => OverrideStatus::Neutral,
},
}
);
if is_isolate {
valid_isolate_count += 1;
Expand Down Expand Up @@ -180,7 +179,7 @@ impl DirectionalStatusStack {
Status {
level: level,
status: status,
},
}
);
}

Expand Down
84 changes: 57 additions & 27 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ impl<'text> InitialInfo<'text> {
range: para_start..para_end,
// P3. If no character is found in p2, set the paragraph level to zero.
level: para_level.unwrap_or(Level::ltr()),
},
}
);
// Reset state for the start of the next paragraph.
para_start = para_end;
Expand Down Expand Up @@ -182,7 +182,7 @@ impl<'text> InitialInfo<'text> {
Level::rtl()
} else {
Level::ltr()
},
}
);
}
}
Expand All @@ -202,7 +202,7 @@ impl<'text> InitialInfo<'text> {
ParagraphInfo {
range: para_start..text.len(),
level: para_level.unwrap_or(Level::ltr()),
},
}
);
}
assert!(original_classes.len() == text.len());
Expand Down Expand Up @@ -301,8 +301,9 @@ impl<'text> BidiInfo<'text> {
pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, str> {
let (levels, runs) = self.visual_runs(para, line.clone());
if runs.len() == 1 && levels[runs[0].start].is_ltr() {
return self.text.into();
return self.text[line.clone()].into();
}

let mut result = String::with_capacity(line.len());
for run in runs {
if levels[run.start].is_rtl() {
Expand Down Expand Up @@ -666,47 +667,76 @@ mod tests {
assert_eq!(BidiInfo::new("אבּג\n123", None).has_rtl(), true);
}

fn reorder_paras(text: &str) -> Vec<Cow<str>> {
let bidi_info = BidiInfo::new(text, None);
bidi_info
.paragraphs
.iter()
.map(|para| bidi_info.reorder_line(para, para.range.clone()))
.collect()
}

#[test]
fn test_reorder_line() {
fn reorder(text: &str) -> Cow<str> {
let bidi_info = BidiInfo::new(text, None);
let para = &bidi_info.paragraphs[0];
let line = para.range.clone();
bidi_info.reorder_line(para, line)
}
/// Bidi_Class: L L L B L L L B L L L
assert_eq!(
reorder_paras("abc\ndef\nghi"),
vec!["abc\n", "def\n", "ghi"]
);

/// Bidi_Class: L L EN B L L EN B L L EN
assert_eq!(
reorder_paras("ab1\nde2\ngh3"),
vec!["ab1\n", "de2\n", "gh3"]
);

/// Bidi_Class: L L L B AL AL AL
assert_eq!(reorder_paras("abc\nابج"), vec!["abc\n", "جبا"]);

assert_eq!(reorder("abc123"), "abc123");
assert_eq!(reorder("1.-2"), "1.-2");
assert_eq!(reorder("1-.2"), "1-.2");
assert_eq!(reorder("abc אבג"), "abc גבא");
/// Bidi_Class: AL AL AL B L L L
assert_eq!(reorder_paras("ابج\nabc"), vec!["\nجبا", "abc"]);

assert_eq!(reorder_paras("1.-2"), vec!["1.-2"]);
assert_eq!(reorder_paras("1-.2"), vec!["1-.2"]);
assert_eq!(reorder_paras("abc אבג"), vec!["abc גבא"]);

// Numbers being weak LTR characters, cannot reorder strong RTL
assert_eq!(reorder("123 אבג"), "גבא 123");
assert_eq!(reorder_paras("123 אבג"), vec!["גבא 123"]);

// Testing for RLE Character
assert_eq!(
reorder("\u{202B}abc אבג\u{202C}"),
"\u{202B}\u{202C}גבא abc"
reorder_paras("\u{202B}abc אבג\u{202C}"),
vec!["\u{202B}\u{202C}גבא abc"]
);

// Testing neutral characters
assert_eq!(reorder("אבג? אבג"), "גבא ?גבא");
assert_eq!(reorder_paras("אבג? אבג"), vec!["גבא ?גבא"]);

// Testing neutral characters with special case
assert_eq!(reorder("A אבג?"), "A גבא?");
assert_eq!(reorder_paras("A אבג?"), vec!["A גבא?"]);

// Testing neutral characters with Implicit RTL Marker
// The given test highlights a possible non-conformance issue that will perhaps be fixed in
// the subsequent steps.
//assert_eq!(reorder("A אבג?\u{202f}"), "A \u{202f}?גבא");
assert_eq!(reorder("אבג abc"), "abc גבא");
assert_eq!(
reorder("abc\u{2067}.-\u{2069}ghi"),
"abc\u{2067}-.\u{2069}ghi"
reorder_paras("A אבג?\u{200F}"),
vec!["A \u{200F}?גבא"]
);
assert_eq!(reorder_paras("אבג abc"), vec!["abc גבא"]);
assert_eq!(
reorder_paras("abc\u{2067}.-\u{2069}ghi"),
vec!["abc\u{2067}-.\u{2069}ghi"]
);
assert_eq!(
reorder_paras("Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!"),
vec!["Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"]
);

// With mirrorable characters in RTL run
assert_eq!(reorder_paras("א(ב)ג."), vec![".ג)ב(א"]);

// With mirrorable characters on level boundry
assert_eq!(
reorder("Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!"),
"Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"
reorder_paras("אב(גד[&ef].)gh"),
vec!["ef].)gh&[דג(בא"]
);
}
}
Expand Down
6 changes: 2 additions & 4 deletions src/prepare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,7 @@ pub fn isolating_run_sequences(
let level = levels[start];

// Get the level of the last non-removed char before the runs.
let pred_level = match initial_classes[..start]
.iter()
.rposition(not_removed_by_x9) {
let pred_level = match initial_classes[..start].iter().rposition(not_removed_by_x9) {
Some(idx) => levels[idx],
None => para_level,
};
Expand All @@ -118,7 +116,7 @@ pub fn isolating_run_sequences(
sos: max(level, pred_level).bidi_class(),
eos: max(level, succ_level).bidi_class(),
}
},
}
)
.collect();
}
Expand Down
Loading

0 comments on commit 36c0603

Please sign in to comment.