Skip to main content

Work with Descriptors (Embeddings)

Open In Colab Download View source on GitHub

ApertureDB supports mulitmodal vector indexing, search, and classification so we could search for matching recipes or matching images of food dishes.

Connect to ApertureDB

Option A: ApertureDB Cloud (recommended)
Sign up for a free 30-day trial. Get your key from Connect > Generate API Key, add it to a .env file in this directory:

APERTUREDB_KEY=your_key_here

Option B: Community Edition (local Docker)
Run this in a terminal before starting the notebook:

docker run -d --name aperturedb \
-p 55555:55555 -e ADB_MASTER_KEY=admin -e ADB_FORCE_SSL=false \
aperturedata/aperturedb-community
%pip install --upgrade --quiet aperturedb python-dotenv

# Option A: ApertureDB Cloud
from dotenv import load_dotenv
load_dotenv() # loads APERTUREDB_KEY from .env into the environment

True
# Option B: Community Edition (local Docker)
# !adb config create localdb --active \
# --host localhost --port 55555 \
# --username admin --password admin \
# --no-use-ssl --no-interactive

from aperturedb.CommonLibrary import create_connector

client = create_connector()
response, _ = client.query([{"GetStatus": {}}])
client.print_last_response()

[
{
"GetStatus": {
"info": "OK",
"status": 0,
"system": "ApertureDB",
"version": "0.19.6"
}
}
]

Define the search space for recipes

This defines the search space to find similar descriptors(emmbeddings) to the given input Embedding

descriptorset_name = "recipe_search"
q = [{
"AddDescriptorSet": { # a new descriptor set / collection is added only if the name doesn't exist
"name": descriptorset_name,
"dimensions": 1024,
"engine": "Flat", # It's possible to choose different engines or even multiple
"metric": "L2", # It's possible to choose different distance metrics or even multiple
"properties": {
"year_created": 2023,
"source": "ApertureDB cookbook dataset",
"model": "embed-english-v3.0",
"provider": "cohere"
}
}
}]

responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"AddDescriptorSet": {
"status": 0
}
}
]

Prepare the embedding to be added

Embeddings, if precomputed, can come from a numpy file OR can be generated by calling a relevant embedding model.

import numpy as np

# Download the sample file
! mkdir -p data; cd data; wget https://github.com/aperture-data/Cookbook/blob/e333f6c59070b9165033d9ddd5af852a6b9624ba/notebooks/simple/data/embeddings.npy; cd -

# File can contain multiple descriptors of matching dimensions. So you can load them
# by passing the right index
embedding_npy_array = np.load("data/embeddings.npy")
index = 0 # We currently have only one embedding in the file
embedding = embedding_npy_array[index]

# expected byte array format for adding descriptor in ApertureDB
embedding_bytes = embedding.astype('float32').tobytes()
--2026-04-03 07:36:46--  https://github.com/aperture-data/Cookbook/blob/e333f6c59070b9165033d9ddd5af852a6b9624ba/notebooks/simple/data/embeddings.npy
Resolving github.com (github.com)... 140.82.116.4
Connecting to github.com (github.com)|140.82.116.4|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘embeddings.npy.1’

embeddings.npy.1 [ <=> ] 220.55K --.-KB/s in 0.03s

2026-04-03 07:36:49 (6.73 MB/s) - ‘embeddings.npy.1’ saved [225840]

/mnt/persistent/notebooks/Cookbook/notebooks/simple

Add a Recipe Embedding for Similarity Search Later

For bulk additions, we recommend using the Python SDK loaders

q = [{
"AddDescriptor": {
"set": descriptorset_name, # descriptors / embeddings are always added to a set / collection
"label": "dinner",
"properties": {
"id": 75,
"year_created": 2023,
"source": "Cookbook recipe",
"model": "embed-english-v3.0",
"provider": "cohere"
},
"if_not_found": { # conditional add
"id": ["==", 75]
}
}
}]

responses, blobs = client.query(q, [embedding_bytes])

print(client.get_last_response_str())
[
{
"AddDescriptor": {
"status": 0
}
}
]

K-NN Search For Matching Embedding

Since we have only added one and we are searching that same one, we expect one matching embedding

q = [{
"FindDescriptor": {
# Specify the descriptor set in which to search.
"set": descriptorset_name,
"k_neighbors": 6,
"distances": True,
"labels": True,
"blobs": True,
"results": {
"all_properties": True
}
}
}]

responses, blobs = client.query(q, [embedding_bytes])

print(client.get_last_response_str())
[
{
"FindDescriptor": {
"blobs_start": 0,
"entities": [
{
"_blob_index": 0,
"_distance": 0.0,
"_label": "dinner",
"_set_name": "recipe_search",
"_uniqueid": "3.192.487880",
"id": 75,
"model": "embed-english-v3.0",
"provider": "cohere",
"source": "Cookbook recipe",
"year_created": 2023
}
],
"returned": 1,
"status": 0
}
}
]

Remove Extra Properties with Update

q = [{
"UpdateDescriptor": {
"constraints": {
"id": ["==", 75]
},
"remove_props": ["year_created"]
}
}]


responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"UpdateDescriptor": {
"count": 1,
"status": 0
}
}
]

Double Check if Update Worked

q = [{
"FindDescriptor": {
# Specify the descriptor set in which to search.
"set": descriptorset_name,
"constraints": {
"id": ["==", 75]
},
"results": {
"all_properties": True
}
}
}]

responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"FindDescriptor": {
"entities": [
{
"_set_name": "recipe_search",
"_uniqueid": "3.192.487880",
"id": 75,
"model": "embed-english-v3.0",
"provider": "cohere",
"source": "Cookbook recipe"
}
],
"returned": 1,
"status": 0
}
}
]

Remove the Embedding

q = [{
"DeleteDescriptor": {
"constraints": {
"provider": ["==", "cohere"]
}
}
}]

responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"DeleteDescriptor": {
"count": 1,
"status": 0
}
}
]

Verify Deletion

q = [{
"FindDescriptor": {
# Specify the descriptor set in which to search.
"set": descriptorset_name,
"constraints": {
"id": ["==", 75]
},
"results": {
"all_properties": True
}
}
}]

responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"FindDescriptor": {
"returned": 0,
"status": 0
}
}
]

Remove the Descriptor Set

q = [{
"DeleteDescriptorSet": {
"with_name": descriptorset_name
}
}]

responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"DeleteDescriptorSet": {
"count": 1,
"status": 0
}
}
]

What's next?