Skip to content

Commit

Permalink
Merge pull request #167 from ashvardanian/main
Browse files Browse the repository at this point in the history
Disaggregating vector storage from the HNSW index
  • Loading branch information
ashvardanian authored Jul 30, 2023
2 parents ade032f + 368d853 commit b00c06e
Show file tree
Hide file tree
Showing 62 changed files with 5,039 additions and 4,592 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/prerelease.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ permissions:

jobs:

build_test:
name: Test USearch
test_cpp:
name: Test C++
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -163,7 +163,7 @@ jobs:
- name: Build USearch by Emscripten
run: |
./emsdk/emsdk activate latest && source ./emsdk/emsdk_env.sh
emcmake cmake -B ./build -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -s TOTAL_MEMORY=64MB" && emmake make -C ./build
emcmake cmake -DUSEARCH_BUILD_BENCHMARK=0 -DUSEARCH_BUILD_WASM=1 -B ./build -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -s TOTAL_MEMORY=64MB" && emmake make -C ./build
- name: Run tests
run: node ./build/test.js

Expand Down
20 changes: 10 additions & 10 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@
],
"stopAtEntry": false,
"linux": {
"preLaunchTask": "Linux Build C++ Bench Debug",
"preLaunchTask": "Linux Build C++ Debug",
"MIMode": "gdb"
},
"osx": {
"preLaunchTask": "MacOS Build C++ Bench Debug",
"preLaunchTask": "MacOS Build C++ Debug",
"MIMode": "lldb"
}
},
{
"name": "Debug C++ Bench on Wiki 1M @Cos",
"name": "Debug C++ on Wiki 1M @Cos",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build_debug/bench",
Expand All @@ -53,7 +53,7 @@
],
"stopAtEntry": false,
"linux": {
"preLaunchTask": "Linux Build C++ Bench Debug",
"preLaunchTask": "Linux Build C++ Debug",
"MIMode": "gdb",
"environment": [
{
Expand All @@ -63,12 +63,12 @@
]
},
"osx": {
"preLaunchTask": "MacOS Build C++ Bench Debug",
"preLaunchTask": "MacOS Build C++ Debug",
"MIMode": "lldb"
}
},
{
"name": "Debug C++ Bench on Text-to-Image 1B @Cos",
"name": "Debug C++ on Text-to-Image 1B @Cos",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build_debug/bench",
Expand All @@ -90,16 +90,16 @@
],
"stopAtEntry": false,
"linux": {
"preLaunchTask": "Linux Build C++ Bench Debug",
"preLaunchTask": "Linux Build C++ Debug",
"MIMode": "gdb"
},
"osx": {
"preLaunchTask": "MacOS Build C++ Bench Debug",
"preLaunchTask": "MacOS Build C++ Debug",
"MIMode": "lldb"
}
},
{
"name": "Run C++ Bench on Text-to-Image 1M @Cos",
"name": "Run C++ on Text-to-Image 1M @Cos",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build_release/bench",
Expand All @@ -113,7 +113,7 @@
"datasets/t2i_1M/groundtruth.public.100K.ibin",
],
"stopAtEntry": false,
"preLaunchTask": "Linux Build C++ Bench Release",
"preLaunchTask": "Linux Build C++ Release",
"MIMode": "gdb"
},
{
Expand Down
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@
"python.testing.pytestArgs": [
"python"
],
"python.analysis.diagnosticSeverityOverrides": {
"reportMissingImports": "none"
},
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
8 changes: 4 additions & 4 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"version": "2.0.0",
"tasks": [
{
"label": "Linux Build C++ Bench Debug",
"label": "Linux Build C++ Debug",
"command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug",
"args": [],
"type": "shell",
Expand All @@ -11,7 +11,7 @@
]
},
{
"label": "Linux Build C++ Bench Release",
"label": "Linux Build C++ Release",
"command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DCMAKE_BUILD_TYPE=RelWithDebInfo -B ./build_release && make -C ./build_release",
"args": [],
"type": "shell",
Expand All @@ -20,13 +20,13 @@
]
},
{
"label": "MacOS Build C++ Bench Debug",
"label": "MacOS Build C++ Debug",
"command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug",
"args": [],
"type": "shell",
},
{
"label": "MacOS Build C++ Bench Release",
"label": "MacOS Build C++ Release",
"command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -B ./build_release && make -C ./build_release",
"args": [],
"type": "shell"
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "usearch"
version = "0.22.2"
version = "0.22.3"
authors = ["Ash Vardanian <[email protected]>"]
description = "Smaller & Faster Single-File Vector Search Engine from Unum"
edition = "2021"
Expand Down
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Linux • MacOS • Windows • Docker • WebAssembly
-[View large indexes from disk](#disk-based-indexes) without loading into RAM.
- ✅ Space-efficient point-clouds with `uint40_t`, accommodating 4B+ size.
- ✅ Compatible with OpenMP and custom "executors", for fine-grained control over CPU utilization.
-On-the-fly deletions.
-Heterogeneous lookups, renaming/relabeleing, and on-the-fly deletions.
-[Semantic Search](#usearch--ai--multi-modal-semantic-search) and [Joins](#joins).

[usearch-header]: https://github.com/unum-cloud/usearch/blob/main/include/usearch/index.hpp
Expand Down Expand Up @@ -91,7 +91,7 @@ matches: Matches = index.search(vector, 10)

assert len(index) == 1
assert len(matches) == 1
assert matches[0].label == 42
assert matches[0].key == 42
assert matches[0].distance <= 0.001
assert np.allclose(index[42], vector)
```
Expand Down Expand Up @@ -214,17 +214,17 @@ model = uform.get_model('unum-cloud/uform-vl-multilingual')
index = usearch.index.Index(ndim=256)

@server
def add(label: int, photo: pil.Image.Image):
def add(key: int, photo: pil.Image.Image):
image = model.preprocess_image(photo)
vector = model.encode_image(image).detach().numpy()
index.add(label, vector.flatten(), copy=True)
index.add(key, vector.flatten(), copy=True)

@server
def search(query: str) -> np.ndarray:
tokens = model.preprocess_text(query)
vector = model.encode_text(tokens).detach().numpy()
matches = index.search(vector.flatten(), 3)
return matches.labels
return matches.keys

server.run()
```
Expand Down Expand Up @@ -268,9 +268,9 @@ fingerprints = np.vstack([encoder.GetFingerprint(x) for x in molecules])
fingerprints = np.packbits(fingerprints, axis=1)

index = Index(ndim=2048, metric=MetricKind.Tanimoto)
labels = np.arange(len(molecules))
keys = np.arange(len(molecules))

index.add(labels, fingerprints)
index.add(keys, fingerprints)
matches = index.search(fingerprints, 10)
```

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.22.2
0.22.3
4 changes: 2 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ fn main() {
println!("cargo:rerun-if-changed=rust/lib.rs");
println!("cargo:rerun-if-changed=rust/lib.cpp");
println!("cargo:rerun-if-changed=rust/lib.hpp");
println!("cargo:rerun-if-changed=include/index_punned_helpers.hpp");
println!("cargo:rerun-if-changed=include/index_punned_dense.hpp");
println!("cargo:rerun-if-changed=include/index_plugins.hpp");
println!("cargo:rerun-if-changed=include/index_dense.hpp");
println!("cargo:rerun-if-changed=include/usearch/index.hpp");
}
Loading

0 comments on commit b00c06e

Please sign in to comment.