Skip to content

Commit

Permalink
refactor fast field query (#2452)
Browse files Browse the repository at this point in the history
As preparation of #2023 and #1709

* Use Term to pass parameters
* merge u64 and ip fast field range query

Side note: I did not rename range_query_u64_fastfield, because then git can't track the changes.
  • Loading branch information
PSeitz authored Jul 15, 2024
1 parent eab6608 commit 1b40766
Show file tree
Hide file tree
Showing 14 changed files with 680 additions and 949 deletions.
9 changes: 7 additions & 2 deletions examples/integer_range_search.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
use std::ops::Bound;

// # Searching a range on an indexed int field.
//
// Below is an example of creating an indexed integer field in your schema
// You can use RangeQuery to get a Count of all occurrences in a given range.
use tantivy::collector::Count;
use tantivy::query::RangeQuery;
use tantivy::schema::{Schema, INDEXED};
use tantivy::{doc, Index, IndexWriter, Result};
use tantivy::{doc, Index, IndexWriter, Result, Term};

fn main() -> Result<()> {
// For the sake of simplicity, this schema will only have 1 field
Expand All @@ -27,7 +29,10 @@ fn main() -> Result<()> {
reader.reload()?;
let searcher = reader.searcher();
// The end is excluded i.e. here we are searching up to 1969
let docs_in_the_sixties = RangeQuery::new_u64("year".to_string(), 1960..1970);
let docs_in_the_sixties = RangeQuery::new(
Bound::Included(Term::from_field_u64(year_field, 1960)),
Bound::Excluded(Term::from_field_u64(year_field, 1970)),
);
// Uses a Count collector to sum the total number of docs in the range
let num_60s_books = searcher.search(&docs_in_the_sixties, &Count)?;
assert_eq!(num_60s_books, 10);
Expand Down
2 changes: 1 addition & 1 deletion src/indexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ mod tests_mmap {
Type::Str,
),
(format!("{field_name_out_internal}a"), Type::Str),
(format!("{field_name_out_internal}"), Type::Str),
(field_name_out_internal.to_string(), Type::Str),
(format!("num{field_name_out_internal}"), Type::I64),
];
expected_fields.sort();
Expand Down
12 changes: 8 additions & 4 deletions src/query/all_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@ pub struct AllWeight;

impl Weight for AllWeight {
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
let all_scorer = AllScorer {
doc: 0u32,
max_doc: reader.max_doc(),
};
let all_scorer = AllScorer::new(reader.max_doc());
Ok(Box::new(BoostScorer::new(all_scorer, boost)))
}

Expand All @@ -43,6 +40,13 @@ pub struct AllScorer {
max_doc: DocId,
}

impl AllScorer {
/// Creates a new AllScorer with `max_doc` docs.
pub fn new(max_doc: DocId) -> AllScorer {
AllScorer { doc: 0u32, max_doc }
}
}

impl DocSet for AllScorer {
#[inline(always)]
fn advance(&mut self) -> DocId {
Expand Down
2 changes: 1 addition & 1 deletion src/query/disjunction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ mod tests {
.cloned()
.map(VecDocSet::from)
.map(|d| ConstScorer::new(d, 1.0)),
DoNothingCombiner::default(),
DoNothingCombiner,
min_match,
)
};
Expand Down
14 changes: 6 additions & 8 deletions src/query/exist_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ mod tests {
use crate::query::exist_query::ExistsQuery;
use crate::query::{BooleanQuery, RangeQuery};
use crate::schema::{Facet, FacetOptions, Schema, FAST, INDEXED, STRING, TEXT};
use crate::{Index, Searcher};
use crate::{Index, Searcher, Term};

#[test]
fn test_exists_query_simple() -> crate::Result<()> {
Expand Down Expand Up @@ -188,20 +188,18 @@ mod tests {

// exercise seek
let query = BooleanQuery::intersection(vec![
Box::new(RangeQuery::new_u64_bounds(
"all".to_string(),
Bound::Included(50),
Box::new(RangeQuery::new(
Bound::Included(Term::from_field_u64(all_field, 50)),
Bound::Unbounded,
)),
Box::new(ExistsQuery::new_exists_query("even".to_string())),
]);
assert_eq!(searcher.search(&query, &Count)?, 25);

let query = BooleanQuery::intersection(vec![
Box::new(RangeQuery::new_u64_bounds(
"all".to_string(),
Bound::Included(0),
Bound::Excluded(50),
Box::new(RangeQuery::new(
Bound::Included(Term::from_field_u64(all_field, 0)),
Bound::Included(Term::from_field_u64(all_field, 50)),
)),
Box::new(ExistsQuery::new_exists_query("odd".to_string())),
]);
Expand Down
2 changes: 1 addition & 1 deletion src/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub use self::phrase_prefix_query::PhrasePrefixQuery;
pub use self::phrase_query::PhraseQuery;
pub use self::query::{EnableScoring, Query, QueryClone};
pub use self::query_parser::{QueryParser, QueryParserError};
pub use self::range_query::{FastFieldRangeWeight, IPFastFieldRangeWeight, RangeQuery};
pub use self::range_query::{FastFieldRangeWeight, RangeQuery};
pub use self::regex_query::RegexQuery;
pub use self::reqopt_scorer::RequiredOptionalScorer;
pub use self::score_combiner::{
Expand Down
10 changes: 1 addition & 9 deletions src/query/phrase_prefix_query/phrase_prefix_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,7 @@ impl Query for PhrasePrefixQuery {
Bound::Unbounded
};

let mut range_query = RangeQuery::new_term_bounds(
enable_scoring
.schema()
.get_field_name(self.field)
.to_owned(),
self.prefix.1.typ(),
&Bound::Included(self.prefix.1.clone()),
&end_term,
);
let mut range_query = RangeQuery::new(Bound::Included(self.prefix.1.clone()), end_term);
range_query.limit(self.max_expansions as u64);
range_query.weight(enable_scoring)
}
Expand Down
4 changes: 1 addition & 3 deletions src/query/query_parser/logical_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::fmt;
use std::ops::Bound;

use crate::query::Occur;
use crate::schema::{Term, Type};
use crate::schema::Term;
use crate::Score;

#[derive(Clone)]
Expand All @@ -14,8 +14,6 @@ pub enum LogicalLiteral {
prefix: bool,
},
Range {
field: String,
value_type: Type,
lower: Bound<Term>,
upper: Bound<Term>,
},
Expand Down
23 changes: 5 additions & 18 deletions src/query/query_parser/query_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -790,8 +790,6 @@ impl QueryParser {
let (field, json_path) = try_tuple!(self
.split_full_path(&full_path)
.ok_or_else(|| QueryParserError::FieldDoesNotExist(full_path.clone())));
let field_entry = self.schema.get_field_entry(field);
let value_type = field_entry.field_type().value_type();
let mut errors = Vec::new();
let lower = match self.resolve_bound(field, json_path, &lower) {
Ok(bound) => bound,
Expand All @@ -812,12 +810,8 @@ impl QueryParser {
// we failed to parse something. Either way, there is no point emiting it
return (None, errors);
}
let logical_ast = LogicalAst::Leaf(Box::new(LogicalLiteral::Range {
field: self.schema.get_field_name(field).to_string(),
value_type,
lower,
upper,
}));
let logical_ast =
LogicalAst::Leaf(Box::new(LogicalLiteral::Range { lower, upper }));
(Some(logical_ast), errors)
}
UserInputLeaf::Set {
Expand Down Expand Up @@ -884,14 +878,7 @@ fn convert_literal_to_query(
Box::new(PhraseQuery::new_with_offset_and_slop(terms, slop))
}
}
LogicalLiteral::Range {
field,
value_type,
lower,
upper,
} => Box::new(RangeQuery::new_term_bounds(
field, value_type, &lower, &upper,
)),
LogicalLiteral::Range { lower, upper } => Box::new(RangeQuery::new(lower, upper)),
LogicalLiteral::Set { elements, .. } => Box::new(TermSetQuery::new(elements)),
LogicalLiteral::All => Box::new(AllQuery),
}
Expand Down Expand Up @@ -1136,8 +1123,8 @@ mod test {
let query = make_query_parser().parse_query("title:[A TO B]").unwrap();
assert_eq!(
format!("{query:?}"),
"RangeQuery { field: \"title\", value_type: Str, lower_bound: Included([97]), \
upper_bound: Included([98]), limit: None }"
"RangeQuery { lower_bound: Included(Term(field=0, type=Str, \"a\")), upper_bound: \
Included(Term(field=0, type=Str, \"b\")), limit: None }"
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,12 @@ impl<T: Send + Sync + PartialOrd + Copy + Debug + 'static> DocSet for RangeDocSe

#[cfg(test)]
mod tests {
use std::ops::Bound;

use crate::collector::Count;
use crate::directory::RamDirectory;
use crate::query::RangeQuery;
use crate::{schema, IndexBuilder, TantivyDocument};
use crate::{schema, IndexBuilder, TantivyDocument, Term};

#[test]
fn range_query_fast_optional_field_minimum() {
Expand Down Expand Up @@ -218,10 +220,9 @@ mod tests {
let reader = index.reader().unwrap();
let searcher = reader.searcher();

let query = RangeQuery::new_u64_bounds(
"score".to_string(),
std::ops::Bound::Included(70),
std::ops::Bound::Unbounded,
let query = RangeQuery::new(
Bound::Included(Term::from_field_u64(score_field, 70)),
Bound::Unbounded,
);

let count = searcher.search(&query, &Count).unwrap();
Expand Down
4 changes: 1 addition & 3 deletions src/query/range_query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@ use std::ops::Bound;

use crate::schema::Type;

mod fast_field_range_query;
mod fast_field_range_doc_set;
mod range_query;
mod range_query_ip_fastfield;
mod range_query_u64_fastfield;

pub use self::range_query::RangeQuery;
pub use self::range_query_ip_fastfield::IPFastFieldRangeWeight;
pub use self::range_query_u64_fastfield::FastFieldRangeWeight;

// TODO is this correct?
Expand Down
Loading

0 comments on commit 1b40766

Please sign in to comment.