Skip to content

Commit

Permalink
moves searchVector into 'search' function
Browse files Browse the repository at this point in the history
  • Loading branch information
micheleriva committed Dec 18, 2023
1 parent e9527ac commit 53e674a
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 61 deletions.
65 changes: 6 additions & 59 deletions packages/orama/src/methods/search-vector.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import { findSimilarVectors } from '../components/cosine-similarity.js'
import { getInternalDocumentId } from '../components/internal-document-id-store.js'
import { createError } from '../errors.js'
import type { AnyOrama, Result, Results, TypedDocument } from '../types.js'
import { formatNanoseconds, getNanosecondsTime } from '../utils.js'
import type { AnyOrama, Results, SearchParamsVector, TypedDocument } from '../types.js'
import { searchVector as searchVectorFn } from './search.js'

export type SearchVectorParams = {
vector: number[] | Float32Array
Expand All @@ -13,58 +10,8 @@ export type SearchVectorParams = {
includeVectors?: boolean
}

export async function searchVector<T extends AnyOrama, ResultDocument = TypedDocument<T>>(orama: T, params: SearchVectorParams): Promise<Results<ResultDocument>> {
const timeStart = await getNanosecondsTime()
let { vector } = params
const { property, limit = 10, offset = 0, includeVectors = false } = params
const vectorIndex = orama.data.index.vectorIndexes[property]
const vectorSize = vectorIndex.size
const vectors = vectorIndex.vectors

if (vector.length !== vectorSize) {
throw createError('INVALID_INPUT_VECTOR', property, vectorSize, vector.length)
}

if (!(vector instanceof Float32Array)) {
vector = new Float32Array(vector)
}

const results = findSimilarVectors(vector, vectors, vectorSize, params.similarity)

const docs: Result<ResultDocument>[] = Array.from({ length: limit })

for (let i = 0; i < limit; i++) {
const result = results[i + offset]
if (!result) {
break
}

const originalID = getInternalDocumentId(orama.internalDocumentIDStore, result.id)
const doc = orama.data.docs.docs[originalID]

if (doc) {
if (!includeVectors) {
doc[property] = null
}

const newDoc: Result<ResultDocument> = {
id: result.id,
score: result.score,
document: doc
}
docs[i] = newDoc
}
}

const timeEnd = await getNanosecondsTime()
const elapsedTime = timeEnd - timeStart

return {
count: results.length,
hits: docs.filter(Boolean),
elapsed: {
raw: Number(elapsedTime),
formatted: await formatNanoseconds(elapsedTime)
}
}
export async function searchVector<T extends AnyOrama, ResultDocument = TypedDocument<T>>(orama: T, params: SearchParamsVector<T, ResultDocument>): Promise<Results<ResultDocument>> {
console.warn(`"searchVector" function is now part of "search" function, and will be deprecated soon. Please use "search" instead.`)
console.warn('Read more at https://docs.oramasearch.com/open-source/usage/search/vector-search.html')
return searchVectorFn(orama, params)
}
61 changes: 59 additions & 2 deletions packages/orama/src/methods/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import {
getInternalDocumentId
} from '../components/internal-document-id-store.js'
import { createError } from '../errors.js'
import { getNanosecondsTime, getNested, sortTokenScorePredicate, safeArrayPush } from '../utils.js'
import { getNanosecondsTime, getNested, sortTokenScorePredicate, safeArrayPush, formatNanoseconds } from '../utils.js'
import type {
AnyOrama,
BM25Params,
Expand All @@ -30,7 +30,7 @@ import type {
TypedDocument
} from '../types.js'
import { MODE_FULLTEXT_SEARCH, MODE_VECTOR_SEARCH } from '../constants.js'
import { searchVector } from './search-vector.js'
import { findSimilarVectors } from '../components/cosine-similarity.js'

const defaultBM25Params: BM25Params = {
k: 1.2,
Expand Down Expand Up @@ -120,6 +120,63 @@ export async function search<T extends AnyOrama, ResultDocument = TypedDocument<
throw ('No other search modes are supported yet')
}

export async function searchVector<T extends AnyOrama, ResultDocument = TypedDocument<T>>(orama: T, params: SearchParamsVector<T, ResultDocument>): Promise<Results<ResultDocument>> {
const timeStart = await getNanosecondsTime()
let { vector } = params
const { property, limit = 10, offset = 0, includeVectors = false } = params
const vectorIndex = orama.data.index.vectorIndexes[property]
const vectorSize = vectorIndex.size
const vectors = vectorIndex.vectors

if (vector.length !== vectorSize) {
throw createError('INVALID_INPUT_VECTOR', property, vectorSize, vector.length)
}

if (!(vector instanceof Float32Array)) {
vector = new Float32Array(vector)
}

const results = findSimilarVectors(vector, vectors, vectorSize, params.similarity)

const docs: Result<ResultDocument>[] = Array.from({ length: limit })

for (let i = 0; i < limit; i++) {
const result = results[i + offset]
if (!result) {
break
}

const originalID = getInternalDocumentId(orama.internalDocumentIDStore, result.id)
const doc = orama.data.docs.docs[originalID]

if (doc) {
if (!includeVectors) {
doc[property] = null
}

const newDoc: Result<ResultDocument> = {
id: result.id,
score: result.score,
document: doc
}
docs[i] = newDoc
}
}

const timeEnd = await getNanosecondsTime()
const elapsedTime = timeEnd - timeStart

return {
count: results.length,
hits: docs.filter(Boolean),
elapsed: {
raw: Number(elapsedTime),
formatted: await formatNanoseconds(elapsedTime)
}
}
}


async function fullTextSearch<T extends AnyOrama, ResultDocument = TypedDocument<T>>(orama: T, params: SearchParamsFullText<T, ResultDocument>, language?: string): Promise<Results<ResultDocument>> {
const timeStart = await getNanosecondsTime()
params.relevance = Object.assign(params.relevance ?? {}, defaultBM25Params)
Expand Down

0 comments on commit 53e674a

Please sign in to comment.