Skip to content

Commit

Permalink
Now supports cosine distance metric [API]
Browse files Browse the repository at this point in the history
Added type C4VectorMetric and field C4VectorIndexOptions::metric.
Default value should be kC4VectorMetricEuclidean.
  • Loading branch information
snej committed Jan 11, 2024
1 parent e3cef4c commit 3c8ae9a
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 7 deletions.
10 changes: 9 additions & 1 deletion C/include/c4IndexTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,16 @@ typedef C4_ENUM(uint32_t, C4IndexType){
kC4FullTextIndex, ///< Full-text index
kC4ArrayIndex, ///< Index of array values, for use with UNNEST
kC4PredictiveIndex, ///< Index of prediction() results (Enterprise Edition only)
kC4VectorIndex, ///< Index of ML vector similarity.
kC4VectorIndex, ///< Index of ML vector similarity (Enterprise Edition only)
};

/** Distance metric to use in vector indexes. */
typedef C4_ENUM(uint32_t, C4VectorMetric){
kC4VectorMetricDefault, ///< Use default metric, Euclidean
kC4VectorMetricEuclidean, ///< Euclidean distance (squared)
kC4VectorMetricCosine, ///< Cosine distance (1.0 - cosine similarity)
}; // Values must match IndexSpec::VectorOptions::Metric

/** Types of encoding (compression) to use in vector indexes. */
typedef C4_ENUM(uint32_t, C4VectorEncoding){
kC4VectorEncodingDefault, ///< Use default encoding, which is currently SQ
Expand All @@ -40,6 +47,7 @@ typedef C4_ENUM(uint32_t, C4VectorEncoding){
/** Options for vector indexes. */
typedef struct C4VectorIndexOptions {
unsigned numCentroids; ///< Number of buckets to partition the vectors between
C4VectorMetric metric; ///< Distance metric
C4VectorEncoding encoding; ///< Vector compression type
} C4VectorIndexOptions;

Expand Down
2 changes: 1 addition & 1 deletion LiteCore/Database/CollectionImpl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ namespace litecore {
ftsOpt.ignoreDiacritics = indexOptions->ignoreDiacritics;
ftsOpt.disableStemming = indexOptions->disableStemming;
ftsOpt.stopWords = indexOptions->stopWords;
options = ftsOpt;
options = ftsOpt;
break;
}
case kC4VectorIndex:
Expand Down
7 changes: 7 additions & 0 deletions LiteCore/Query/IndexSpec.hh
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,20 @@ namespace litecore {

/// Options for a vector index.
struct VectorOptions {
enum Metric {
DefaultMetric, ///< Use default metric, Euclidean
Euclidean, ///< Euclidean distance (squared)
Cosine, ///< Cosine distance (1.0 - cosine similarity)
}; // Note: values must match C4VectorMetric in c4IndexTypes.h

enum Encoding {
DefaultEncoding, ///< Use default encoding, which is currently SQ
NoEncoding, ///< No encoding; 4 bytes per dimension, no data loss
SQEncoding, ///< Scalar Quantizer; 1 byte per dimension (recommended)
}; // Note: values must match C4VectorEncoding in c4IndexTypes.h

unsigned numCentroids{2048}; ///< Number of centroids/buckets to divide the index into
Metric metric{DefaultMetric}; ///< Distance metric
Encoding encoding{DefaultEncoding}; ///< Vector encoding/compression
};

Expand Down
2 changes: 1 addition & 1 deletion LiteCore/Query/QueryParser.hh
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ namespace litecore {
string predictiveIdentifier(const Value*) const;
string predictiveTableName(const Value*) const;

/// Translates the JSON-parsed Value to blob-format vector for use by sqlite-vss.
/// Translates the JSON-parsed Value to blob-format vector for use by vectorsearch.
string vectorExpressionSQL(const Value*);
string vectorIndexTableName(const Value* matchExpr);

Expand Down
10 changes: 7 additions & 3 deletions LiteCore/Query/SQLiteKeyStore+VectorIndex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@ using namespace fleece::impl;

namespace litecore {

// Vector search index for ML / predictive query, using the sqlite-vss extension.
// sqlite-vss documentation: https://github.com/asg017/sqlite-vss/blob/main/docs.md
// Vector search index for ML / predictive query, using the vectorsearch extension.
// https://github.com/couchbaselabs/mobile-vector-search/blob/main/README_Extension.md

static constexpr const char* kVectorEncodingNames[] = {nullptr, "none", "PQ", "SQ"};
static constexpr const char* kVectorMetricNames[] = {nullptr, "euclidean2", "cosine"};

// Creates a vector-similarity index using the sqlite-vss extension.
// Creates a vector-similarity index.
bool SQLiteKeyStore::createVectorIndex(const IndexSpec& spec) {
auto vectorTableName = db().auxiliaryTableName(tableName(), KeyStore::kVectorSeparator, spec.name);

Expand All @@ -53,6 +54,9 @@ namespace litecore {
IndexSpec::VectorOptions options;
if ( IndexSpec::VectorOptions const* o = spec.vectorOptions() ) { options = *o; }
createStmt << "centroids=" << options.numCentroids << ",minToTrain=" << options.numCentroids * 25;
if ( options.metric != IndexSpec::VectorOptions::DefaultMetric ) {
createStmt << ",metric=" << kVectorMetricNames[options.metric];
}
if ( options.encoding != IndexSpec::VectorOptions::DefaultEncoding ) {
createStmt << ",encoding=" << kVectorEncodingNames[options.encoding];
}
Expand Down
2 changes: 1 addition & 1 deletion vendor/mobile-vector-search

0 comments on commit 3c8ae9a

Please sign in to comment.