Work with Descriptors (Embeddings)
ApertureDB supports mulitmodal vector indexing, search, and classification so we could search for matching recipes or matching images of food dishes.
Connect to the database
If you haven't already setup the database or configured it, check out our quick start guide
# Install the required client packages if needed
%pip install --upgrade --quiet pip
%pip install --upgrade --quiet aperturedb
from aperturedb.CommonLibrary import create_connector
# Create the connector for ApertureDB
client = create_connector()
# Simple query to see how the database is doing
# https://docs.aperturedata.io/query_language/Reference/db_commands/GetStatus
query = [{
"GetStatus": {
}
}]
# Execute the query to get back a JSON response for GetStatus
response, blobs = client.query(query)
client.print_last_response()
[
{
"GetStatus": {
"info": "OK",
"status": 0,
"system": "ApertureDB",
"version": "0.17.23"
}
}
]
Define the search space for recipes
This defines the search space to find similar descriptors(emmbeddings) to the given input Embedding
descriptorset_name = "recipe_search"
q = [{
"AddDescriptorSet": { # a new descriptor set / collection is added only if the name doesn't exist
"name": descriptorset_name,
"dimensions": 1024,
"engine": "Flat", # It's possible to choose different engines or even multiple
"metric": "L2", # It's possible to choose different distance metrics or even multiple
"properties": {
"year_created": 2023,
"source": "ApertureDB cookbook dataset",
"model": "embed-english-v3.0",
"provider": "cohere"
}
}
}]
responses, blobs = client.query(q)
print(client.get_last_response_str())
[
{
"AddDescriptorSet": {
"status": 0
}
}
]
Prepare the embedding to be added
Embeddings, if precomputed, can come from a numpy file OR can be generated by calling a relevant embedding model.
import numpy as np
# Download the sample file
! mkdir -p data; cd data; wget https://github.com/aperture-data/Cookbook/blob/e333f6c59070b9165033d9ddd5af852a6b9624ba/notebooks/simple/data/embeddings.npy; cd -
# File can contain multiple descriptors of matching dimensions. So you can load them
# by passing the right index
embedding_npy_array = np.load("data/embeddings.npy")
index = 0 # We currently have only one embedding in the file
embedding = embedding_npy_array[index]
# expected byte array format for adding descriptor in ApertureDB
embedding_bytes = embedding.astype('float32').tobytes()
Add a Recipe Embedding for Similarity Search Later
For bulk additions, we recommend using the Python SDK loaders
q = [{
"AddDescriptor": {
"set": descriptorset_name, # descriptors / embeddings are always added to a set / collection
"label": "dinner",
"properties": {
"id": 75,
"year_created": 2023,
"source": "Cookbook recipe",
"model": "embed-english-v3.0",
"provider": "cohere"
},
"if_not_found": { # conditional add
"id": ["==", 75]
}
}
}]
responses, blobs = client.query(q, [embedding_bytes])
print(client.get_last_response_str())
[
{
"AddDescriptor": {
"status": 0
}
}
]
K-NN Search For Matching Embedding
Since we have only added one and we are searching that same one, we expect one matching embedding
q = [{
"FindDescriptor": {
# Specify the descriptor set in which to search.
"set": descriptorset_name,
"k_neighbors": 6,
"distances": True,
"labels": True,
"blobs": True,
"results": {
"all_properties": True
}
}
}]
responses, blobs = client.query(q, [embedding_bytes])
print(client.get_last_response_str())
[
{
"FindDescriptor": {
"blobs_start": 0,
"entities": [
{
"_blob_index": 0,
"_distance": 0.0,
"_label": "dinner",
"_set_name": "recipe_search",
"_uniqueid": "3.174639.224220",
"id": 75,
"model": "embed-english-v3.0",
"provider": "cohere",
"source": "Cookbook recipe",
"year_created": 2023
}
],
"returned": 1,
"status": 0
}
}
]
Remove Extra Properties with Update
q = [{
"UpdateDescriptor": {
"constraints": {
"id": ["==", 75]
},
"remove_props": ["year_created"]
}
}]
responses, blobs = client.query(q)
print(client.get_last_response_str())
[
{
"UpdateDescriptor": {
"count": 1,
"status": 0
}
}
]
Double Check if Update Worked
q = [{
"FindDescriptor": {
# Specify the descriptor set in which to search.
"set": descriptorset_name,
"constraints": {
"id": ["==", 75]
},
"results": {
"all_properties": True
}
}
}]
responses, blobs = client.query(q)
print(client.get_last_response_str())
[
{
"FindDescriptor": {
"entities": [
{
"_set_name": "recipe_search",
"_uniqueid": "3.174639.224220",
"id": 75,
"model": "embed-english-v3.0",
"provider": "cohere",
"source": "Cookbook recipe"
}
],
"returned": 1,
"status": 0
}
}
]
Remove the Embedding
q = [{
"DeleteDescriptor": {
"constraints": {
"provider": ["==", "cohere"]
}
}
}]
responses, blobs = client.query(q)
print(client.get_last_response_str())
[
{
"DeleteDescriptor": {
"count": 1,
"status": 0
}
}
]
Verify Deletion
q = [{
"FindDescriptor": {
# Specify the descriptor set in which to search.
"set": descriptorset_name,
"constraints": {
"id": ["==", 75]
},
"results": {
"all_properties": True
}
}
}]
responses, blobs = client.query(q)
print(client.get_last_response_str())
[
{
"FindDescriptor": {
"returned": 0,
"status": 0
}
}
]
Remove the Descriptor Set
q = [{
"DeleteDescriptorSet": {
"with_name": descriptorset_name
}
}]
responses, blobs = client.query(q)
print(client.get_last_response_str())
[
{
"DeleteDescriptorSet": {
"count": 1,
"status": 0
}
}
]
What's next?