Skip to main content

Work with Descriptors (Embeddings)

Open In Colab Download View source on GitHub

ApertureDB supports mulitmodal vector indexing, search, and classification so we could search for matching recipes or matching images of food dishes.

Connect to the database

If you haven't already setup the database or configured it, check out our quick start guide

# Install the required client packages if needed
%pip install --upgrade --quiet pip
%pip install --upgrade --quiet aperturedb
from aperturedb.CommonLibrary import create_connector

# Create the connector for ApertureDB
client = create_connector()
# Simple query to see how the database is doing
# https://docs.aperturedata.io/query_language/Reference/db_commands/GetStatus
query = [{
"GetStatus": {
}
}]

# Execute the query to get back a JSON response for GetStatus
response, blobs = client.query(query)

client.print_last_response()
[
{
"GetStatus": {
"info": "OK",
"status": 0,
"system": "ApertureDB",
"version": "0.17.23"
}
}
]

Define the search space for recipes

This defines the search space to find similar descriptors(emmbeddings) to the given input Embedding

descriptorset_name = "recipe_search"
q = [{
"AddDescriptorSet": { # a new descriptor set / collection is added only if the name doesn't exist
"name": descriptorset_name,
"dimensions": 1024,
"engine": "Flat", # It's possible to choose different engines or even multiple
"metric": "L2", # It's possible to choose different distance metrics or even multiple
"properties": {
"year_created": 2023,
"source": "ApertureDB cookbook dataset",
"model": "embed-english-v3.0",
"provider": "cohere"
}
}
}]

responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"AddDescriptorSet": {
"status": 0
}
}
]

Prepare the embedding to be added

Embeddings, if precomputed, can come from a numpy file OR can be generated by calling a relevant embedding model.

import numpy as np

# Download the sample file
! mkdir -p data; cd data; wget https://github.com/aperture-data/Cookbook/blob/e333f6c59070b9165033d9ddd5af852a6b9624ba/notebooks/simple/data/embeddings.npy; cd -

# File can contain multiple descriptors of matching dimensions. So you can load them
# by passing the right index
embedding_npy_array = np.load("data/embeddings.npy")
index = 0 # We currently have only one embedding in the file
embedding = embedding_npy_array[index]

# expected byte array format for adding descriptor in ApertureDB
embedding_bytes = embedding.astype('float32').tobytes()

Add a Recipe Embedding for Similarity Search Later

For bulk additions, we recommend using the Python SDK loaders

q = [{
"AddDescriptor": {
"set": descriptorset_name, # descriptors / embeddings are always added to a set / collection
"label": "dinner",
"properties": {
"id": 75,
"year_created": 2023,
"source": "Cookbook recipe",
"model": "embed-english-v3.0",
"provider": "cohere"
},
"if_not_found": { # conditional add
"id": ["==", 75]
}
}
}]

responses, blobs = client.query(q, [embedding_bytes])

print(client.get_last_response_str())
[
{
"AddDescriptor": {
"status": 0
}
}
]

K-NN Search For Matching Embedding

Since we have only added one and we are searching that same one, we expect one matching embedding

q = [{
"FindDescriptor": {
# Specify the descriptor set in which to search.
"set": descriptorset_name,
"k_neighbors": 6,
"distances": True,
"labels": True,
"blobs": True,
"results": {
"all_properties": True
}
}
}]

responses, blobs = client.query(q, [embedding_bytes])

print(client.get_last_response_str())
[
{
"FindDescriptor": {
"blobs_start": 0,
"entities": [
{
"_blob_index": 0,
"_distance": 0.0,
"_label": "dinner",
"_set_name": "recipe_search",
"_uniqueid": "3.174639.224220",
"id": 75,
"model": "embed-english-v3.0",
"provider": "cohere",
"source": "Cookbook recipe",
"year_created": 2023
}
],
"returned": 1,
"status": 0
}
}
]

Remove Extra Properties with Update

q = [{
"UpdateDescriptor": {
"constraints": {
"id": ["==", 75]
},
"remove_props": ["year_created"]
}
}]


responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"UpdateDescriptor": {
"count": 1,
"status": 0
}
}
]

Double Check if Update Worked

q = [{
"FindDescriptor": {
# Specify the descriptor set in which to search.
"set": descriptorset_name,
"constraints": {
"id": ["==", 75]
},
"results": {
"all_properties": True
}
}
}]

responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"FindDescriptor": {
"entities": [
{
"_set_name": "recipe_search",
"_uniqueid": "3.174639.224220",
"id": 75,
"model": "embed-english-v3.0",
"provider": "cohere",
"source": "Cookbook recipe"
}
],
"returned": 1,
"status": 0
}
}
]

Remove the Embedding

q = [{
"DeleteDescriptor": {
"constraints": {
"provider": ["==", "cohere"]
}
}
}]

responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"DeleteDescriptor": {
"count": 1,
"status": 0
}
}
]

Verify Deletion

q = [{
"FindDescriptor": {
# Specify the descriptor set in which to search.
"set": descriptorset_name,
"constraints": {
"id": ["==", 75]
},
"results": {
"all_properties": True
}
}
}]

responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"FindDescriptor": {
"returned": 0,
"status": 0
}
}
]

Remove the Descriptor Set

q = [{
"DeleteDescriptorSet": {
"with_name": descriptorset_name
}
}]

responses, blobs = client.query(q)

print(client.get_last_response_str())
[
{
"DeleteDescriptorSet": {
"count": 1,
"status": 0
}
}
]

What's next?