From a355cffc239f55831ae016acb5efd4fdb6d4f933 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 3 Sep 2024 20:25:06 -0700 Subject: [PATCH] Updated exact scores to be consistent with approximate scores --- lib/searchkick/query.rb | 7 ++++--- test/knn_test.rb | 14 +++----------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/lib/searchkick/query.rb b/lib/searchkick/query.rb index 02c7efca..17622384 100644 --- a/lib/searchkick/query.rb +++ b/lib/searchkick/query.rb @@ -927,7 +927,8 @@ def set_knn(payload, knn, per_page, offset) query_value: vector, space_type: space_type } - } + }, + boost: distance == "cosine" ? 0.5 : 1.0 } } else @@ -947,9 +948,9 @@ def set_knn(payload, knn, per_page, offset) source = case distance when "cosine" - "cosineSimilarity(params.query_vector, params.field) + 1.0" + "(cosineSimilarity(params.query_vector, params.field) + 1.0) * 0.5" when "euclidean" - "1 / (1 + l2norm(params.query_vector, params.field))" + "double l2 = l2norm(params.query_vector, params.field); 1 / (1 + l2 * l2)" else raise ArgumentError, "Unknown distance: #{distance}" end diff --git a/test/knn_test.rb b/test/knn_test.rb index c3ed506e..fc284c45 100644 --- a/test/knn_test.rb +++ b/test/knn_test.rb @@ -20,8 +20,7 @@ def test_basic_exact assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3], exact: true} scores = Product.search(knn: {field: :embedding, vector: [1, 2, 3], exact: true}).hits.map { |v| v["_score"] } - # TODO match approximate - assert_in_delta 2, scores[0] + assert_in_delta 1, scores[0] assert_in_delta 0, scores[1] end @@ -72,7 +71,6 @@ def test_euclidean assert_order "*", ["A", "B"], knn: {field: :factors, vector: [1, 2, 3]} scores = Product.search(knn: {field: :factors, vector: [1, 2, 3]}).hits.map { |v| v["_score"] } - # TODO return distance assert_in_delta 1.0 / (1 + 0), scores[0] assert_in_delta 1.0 / (1 + 5**2), scores[1] end @@ -82,13 +80,8 @@ def test_euclidean_exact assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3], distance: "euclidean"} scores = Product.search(knn: {field: :embedding, vector: [1, 2, 3], distance: "euclidean"}).hits.map { |v| v["_score"] } - # TODO return distance assert_in_delta 1.0 / (1 + 0), scores[0] - if Searchkick.opensearch? - assert_in_delta 1.0 / (1 + 5**2), scores[1] - else - assert_in_delta 1.0 / (1 + 5), scores[1] - end + assert_in_delta 1.0 / (1 + 5**2), scores[1] end def test_unindexed @@ -98,8 +91,7 @@ def test_unindexed assert_order "*", ["A", "B"], knn: {field: :vector, vector: [1, 2, 3], distance: "cosine"} scores = Product.search(knn: {field: :vector, vector: [1, 2, 3], distance: "cosine"}).hits.map { |v| v["_score"] } - # TODO match approximate - assert_in_delta 2, scores[0] + assert_in_delta 1, scores[0] assert_in_delta 0, scores[1] error = assert_raises(ArgumentError) do