unicode-rs
diff --git a/‎.github/workflows/rust.yml
+8-5 b/‎.github/workflows/rust.yml
+8-5
diff --git a/‎benches/chars.rs
+2-3 b/‎benches/chars.rs
+2-3
diff --git a/‎scripts/unicode_gen_breaktests.py
+4-4 b/‎scripts/unicode_gen_breaktests.py
+4-4
diff --git a/‎src/grapheme.rs
+16-16 b/‎src/grapheme.rs
+16-16
diff --git a/‎src/lib.rs
+9-9 b/‎src/lib.rs
+9-9
diff --git a/‎src/sentence.rs
+7-9 b/‎src/sentence.rs
+7-9
@@ -7,27 +7,30 @@ on:
     branches: [ master ]
 
 env:
+  CARGO_INCREMENTAL: 0
   CARGO_TERM_COLOR: always
+  RUST_BACKTRACE: 1
+  RUSTFLAGS: -D warnings
+  RUSTDOCFLAGS: -D warnings
 
 jobs:
   build:
-
     runs-on: ubuntu-latest
-
     steps:
     - uses: actions/checkout@v2
     - name: Build
       run: cargo build --verbose
     - name: Run tests
       run: cargo test --verbose
-  fmt:
+    - name: Run clippy
+      run: cargo clippy --all-targets --all --verbose
 
+  fmt:
     runs-on: ubuntu-latest
-
     steps:
     - uses: actions/checkout@v2
     - name: Rustfmt
-      run: cargo fmt --check
+      run: cargo fmt --all --check
     - name: Verify regenerated files
       run: ./scripts/unicode.py && diff tables.rs src/tables.rs
     - name: Verify regenerated tests
 
@@ -6,7 +6,6 @@
 //! is how much slower full unicode handling is.
 
 use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-use unicode_segmentation;
 
 use std::fs;
 use unicode_segmentation::UnicodeSegmentation;
@@ -24,14 +23,14 @@ const FILES: &[&str] = &[
 
 #[inline(always)]
 fn grapheme(text: &str) {
-    for c in UnicodeSegmentation::graphemes(black_box(&*text), true) {
+    for c in UnicodeSegmentation::graphemes(black_box(text), true) {
         black_box(c);
     }
 }
 
 #[inline(always)]
 fn scalar(text: &str) {
-    for c in black_box(&*text).chars() {
+    for c in black_box(text).chars() {
         black_box(c);
     }
 }
 
@@ -169,8 +169,8 @@ def create_grapheme_data(f):
         else:
             test_diff.append((allchars, extgraphs, c))
 
-    stype = "&'static [(&'static str, &'static [&'static str])]"
-    dtype = "&'static [(&'static str, &'static [&'static str], &'static [&'static str])]"
+    stype = "&[(&str, &[&str])]"
+    dtype = "&[(&str, &[&str], &[&str])]"
     f.write("    // official Unicode test data\n")
     f.write("    // http://www.unicode.org/Public/%s/ucd/auxiliary/GraphemeBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER)
     unicode.emit_table(f, "TEST_SAME", test_same, stype, True, showfun, True)
@@ -185,7 +185,7 @@ def create_words_data(f):
         allchars = [cn for s in c for cn in s]
         test.append((allchars, c))
 
-    wtype = "&'static [(&'static str, &'static [&'static str])]"
+    wtype = "&[(&str, &[&str])]"
     f.write("    // official Unicode test data\n")
     f.write("    // http://www.unicode.org/Public/%s/ucd/auxiliary/WordBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER)
     unicode.emit_table(f, "TEST_WORD", test, wtype, True, showfun, True)
@@ -199,7 +199,7 @@ def create_sentence_data(f):
         allchars = [cn for s in c for cn in s]
         test.append((allchars, c))
 
-    wtype = "&'static [(&'static str, &'static [&'static str])]"
+    wtype = "&[(&str, &[&str])]"
     f.write("    // official Unicode test data\n")
     f.write("    // http://www.unicode.org/Public/%s/ucd/auxiliary/SentenceBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER)
     unicode.emit_table(f, "TEST_SENTENCE", test, wtype, True, showfun, True)
 
@@ -140,7 +140,7 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
 }
 
 #[inline]
-pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> {
+pub fn new_graphemes(s: &str, is_extended: bool) -> Graphemes<'_> {
     let len = s.len();
     Graphemes {
         string: s,
@@ -150,7 +150,7 @@ pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> {
 }
 
 #[inline]
-pub fn new_grapheme_indices<'b>(s: &'b str, is_extended: bool) -> GraphemeIndices<'b> {
+pub fn new_grapheme_indices(s: &str, is_extended: bool) -> GraphemeIndices<'_> {
     GraphemeIndices {
         start_offset: s.as_ptr() as usize,
         iter: new_graphemes(s, is_extended),
@@ -296,10 +296,10 @@ impl GraphemeCursor {
             GraphemeState::Unknown
         };
         GraphemeCursor {
-            offset: offset,
-            len: len,
-            state: state,
-            is_extended: is_extended,
+            offset,
+            len,
+            state,
+            is_extended,
             cat_before: None,
             cat_after: None,
             pre_context_offset: None,
@@ -406,7 +406,7 @@ impl GraphemeCursor {
         assert!(chunk_start + chunk.len() == self.pre_context_offset.unwrap());
         self.pre_context_offset = None;
         if self.is_extended && chunk_start + chunk.len() == self.offset {
-            let ch = chunk.chars().rev().next().unwrap();
+            let ch = chunk.chars().next_back().unwrap();
             if self.grapheme_category(ch) == gr::GC_Prepend {
                 self.decide(false); // GB9b
                 return;
@@ -417,7 +417,7 @@ impl GraphemeCursor {
             GraphemeState::Emoji => self.handle_emoji(chunk, chunk_start),
             _ => {
                 if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start {
-                    let ch = chunk.chars().rev().next().unwrap();
+                    let ch = chunk.chars().next_back().unwrap();
                     self.cat_before = Some(self.grapheme_category(ch));
                 }
             }
@@ -540,10 +540,10 @@ impl GraphemeCursor {
         if self.state == GraphemeState::NotBreak {
             return Ok(false);
         }
-        if self.offset < chunk_start || self.offset >= chunk_start + chunk.len() {
-            if self.offset > chunk_start + chunk.len() || self.cat_after.is_none() {
-                return Err(GraphemeIncomplete::InvalidOffset);
-            }
+        if (self.offset < chunk_start || self.offset >= chunk_start + chunk.len())
+            && (self.offset > chunk_start + chunk.len() || self.cat_after.is_none())
+        {
+            return Err(GraphemeIncomplete::InvalidOffset);
         }
         if let Some(pre_context_offset) = self.pre_context_offset {
             return Err(GraphemeIncomplete::PreContext(pre_context_offset));
@@ -566,15 +566,15 @@ impl GraphemeCursor {
             }
         }
         if self.cat_before.is_none() {
-            let ch = chunk[..offset_in_chunk].chars().rev().next().unwrap();
+            let ch = chunk[..offset_in_chunk].chars().next_back().unwrap();
             self.cat_before = Some(self.grapheme_category(ch));
         }
         match check_pair(self.cat_before.unwrap(), self.cat_after.unwrap()) {
-            PairResult::NotBreak => return self.decision(false),
-            PairResult::Break => return self.decision(true),
+            PairResult::NotBreak => self.decision(false),
+            PairResult::Break => self.decision(true),
             PairResult::Extended => {
                 let is_extended = self.is_extended;
-                return self.decision(!is_extended);
+                self.decision(!is_extended)
             }
             PairResult::Regional => {
                 if let Some(ris_count) = self.ris_count {
 
@@ -96,7 +96,7 @@ pub trait UnicodeSegmentation {
     ///
     /// assert_eq!(&gr2[..], b);
     /// ```
-    fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>;
+    fn graphemes(&self, is_extended: bool) -> Graphemes<'_>;
 
     /// Returns an iterator over the grapheme clusters of `self` and their
     /// byte offsets. See `graphemes()` for more information.
@@ -111,7 +111,7 @@ pub trait UnicodeSegmentation {
     ///
     /// assert_eq!(&gr_inds[..], b);
     /// ```
-    fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>;
+    fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices<'_>;
 
     /// Returns an iterator over the words of `self`, separated on
     /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries).
@@ -133,7 +133,7 @@ pub trait UnicodeSegmentation {
     ///
     /// assert_eq!(&uw1[..], b);
     /// ```
-    fn unicode_words<'a>(&'a self) -> UnicodeWords<'a>;
+    fn unicode_words(&self) -> UnicodeWords<'_>;
 
     /// Returns an iterator over the words of `self`, separated on
     /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries), and their
@@ -157,7 +157,7 @@ pub trait UnicodeSegmentation {
     ///
     /// assert_eq!(&uwi1[..], b);
     /// ```
-    fn unicode_word_indices<'a>(&'a self) -> UnicodeWordIndices<'a>;
+    fn unicode_word_indices(&self) -> UnicodeWordIndices<'_>;
 
     /// Returns an iterator over substrings of `self` separated on
     /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries).
@@ -173,7 +173,7 @@ pub trait UnicodeSegmentation {
     ///
     /// assert_eq!(&swu1[..], b);
     /// ```
-    fn split_word_bounds<'a>(&'a self) -> UWordBounds<'a>;
+    fn split_word_bounds(&self) -> UWordBounds<'_>;
 
     /// Returns an iterator over substrings of `self`, split on UAX#29 word boundaries,
     /// and their offsets. See `split_word_bounds()` for more information.
@@ -188,7 +188,7 @@ pub trait UnicodeSegmentation {
     ///
     /// assert_eq!(&swi1[..], b);
     /// ```
-    fn split_word_bound_indices<'a>(&'a self) -> UWordBoundIndices<'a>;
+    fn split_word_bound_indices(&self) -> UWordBoundIndices<'_>;
 
     /// Returns an iterator over substrings of `self` separated on
     /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
@@ -210,7 +210,7 @@ pub trait UnicodeSegmentation {
     ///
     /// assert_eq!(&us1[..], b);
     /// ```
-    fn unicode_sentences<'a>(&'a self) -> UnicodeSentences<'a>;
+    fn unicode_sentences(&self) -> UnicodeSentences<'_>;
 
     /// Returns an iterator over substrings of `self` separated on
     /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
@@ -227,7 +227,7 @@ pub trait UnicodeSegmentation {
     ///
     /// assert_eq!(&ssb1[..], b);
     /// ```
-    fn split_sentence_bounds<'a>(&'a self) -> USentenceBounds<'a>;
+    fn split_sentence_bounds(&self) -> USentenceBounds<'_>;
 
     /// Returns an iterator over substrings of `self`, split on UAX#29 sentence boundaries,
     /// and their offsets. See `split_sentence_bounds()` for more information.
@@ -243,7 +243,7 @@ pub trait UnicodeSegmentation {
     ///
     /// assert_eq!(&ssi1[..], b);
     /// ```
-    fn split_sentence_bound_indices<'a>(&'a self) -> USentenceBoundIndices<'a>;
+    fn split_sentence_bound_indices(&self) -> USentenceBoundIndices<'_>;
 }
 
 impl UnicodeSegmentation for str {
 
@@ -264,9 +264,7 @@ mod fwd {
             }
 
             // SB2 https://unicode.org/reports/tr29/#SB2
-            if self.state.match1(StatePart::Sot) {
-                None
-            } else if self.state.match1(StatePart::Eot) {
+            if self.state.match1(StatePart::Sot) || self.state.match1(StatePart::Eot) {
                 None
             } else {
                 self.state = self.state.end();
@@ -275,7 +273,7 @@ mod fwd {
         }
     }
 
-    pub fn new_sentence_breaks<'a>(source: &'a str) -> SentenceBreaks<'a> {
+    pub fn new_sentence_breaks(source: &str) -> SentenceBreaks<'_> {
         SentenceBreaks {
             string: source,
             pos: 0,
@@ -329,28 +327,28 @@ pub struct USentenceBoundIndices<'a> {
 }
 
 #[inline]
-pub fn new_sentence_bounds<'a>(source: &'a str) -> USentenceBounds<'a> {
+pub fn new_sentence_bounds(source: &str) -> USentenceBounds<'_> {
     USentenceBounds {
         iter: fwd::new_sentence_breaks(source),
         sentence_start: None,
     }
 }
 
 #[inline]
-pub fn new_sentence_bound_indices<'a>(source: &'a str) -> USentenceBoundIndices<'a> {
+pub fn new_sentence_bound_indices(source: &str) -> USentenceBoundIndices<'_> {
     USentenceBoundIndices {
         start_offset: source.as_ptr() as usize,
         iter: new_sentence_bounds(source),
     }
 }
 
 #[inline]
-pub fn new_unicode_sentences<'b>(s: &'b str) -> UnicodeSentences<'b> {
+pub fn new_unicode_sentences(s: &str) -> UnicodeSentences<'_> {
     use super::UnicodeSegmentation;
     use crate::tables::util::is_alphanumeric;
 
     fn has_alphanumeric(s: &&str) -> bool {
-        s.chars().any(|c| is_alphanumeric(c))
+        s.chars().any(is_alphanumeric)
     }
     let has_alphanumeric: fn(&&str) -> bool = has_alphanumeric; // coerce to fn pointer
 
@@ -384,7 +382,7 @@ impl<'a> Iterator for USentenceBounds<'a> {
 
     #[inline]
     fn next(&mut self) -> Option<&'a str> {
-        if self.sentence_start == None {
+        if self.sentence_start.is_none() {
             if let Some(start_pos) = self.iter.next() {
                 self.sentence_start = Some(start_pos)
             } else {
Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,6 @@`
`6`	`6`	`//! is how much slower full unicode handling is.`
`7`	`7`
`8`	`8`	`use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};`
`9`		`-use unicode_segmentation;`
`10`	`9`
`11`	`10`	`use std::fs;`
`12`	`11`	`use unicode_segmentation::UnicodeSegmentation;`
`@@ -24,14 +23,14 @@ const FILES: &[&str] = &[`
`24`	`23`
`25`	`24`	`#[inline(always)]`
`26`	`25`	`fn grapheme(text: &str) {`
`27`		`- for c in UnicodeSegmentation::graphemes(black_box(&*text), true) {`
	`26`	`+ for c in UnicodeSegmentation::graphemes(black_box(text), true) {`
`28`	`27`	`black_box(c);`
`29`	`28`	`}`
`30`	`29`	`}`
`31`	`30`
`32`	`31`	`#[inline(always)]`
`33`	`32`	`fn scalar(text: &str) {`
`34`		`- for c in black_box(&*text).chars() {`
	`33`	`+ for c in black_box(text).chars() {`
`35`	`34`	`black_box(c);`
`36`	`35`	`}`
`37`	`36`	`}`