Skip to main content

Image Vector Search

Open In Colab Download View source on GitHub

This notebook embeds food images from the Cookbook dataset using a CLIP model and runs text-to-image search against ApertureDB.

Connect to ApertureDB

Option A: ApertureDB Cloud (recommended)
Sign up for a free 30-day trial. Get your key from Connect → Generate API Key, add it to a .env file in this directory:

APERTUREDB_KEY=your_key_here

Option B: Community Edition (local Docker)
Run this in a terminal before starting the notebook:

docker run -d --name aperturedb \
-p 55555:55555 -e ADB_MASTER_KEY=admin -e ADB_FORCE_SSL=false \
aperturedata/aperturedb-community

See client configuration options for all connection methods and server setup options for deployment choices.

%pip install aperturedb sentence-transformers Pillow requests pandas python-dotenv
from dotenv import load_dotenv
load_dotenv()
True
# !adb config create localdb --active \
# --host localhost --port 55555 \
# --username admin --password admin \
# --no-use-ssl --no-interactive
from aperturedb.CommonLibrary import create_connector

client = create_connector()
response, _ = client.query([{"GetStatus": {}}])
client.print_last_response()
[
{
"GetStatus": {
"info": "OK",
"status": 0,
"system": "ApertureDB",
"version": "0.19.6"
}
}
]

Step: Load the CLIP model

from sentence_transformers import SentenceTransformer
model = SentenceTransformer("clip-ViT-B-32")
print(f"Embedding dimensions: {model.get_sentence_embedding_dimension()}")

Step: Load image URLs from Cookbook dataset

import pandas as pd
dishes = pd.read_csv(
"https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images.adb.csv"
)
dishes = dishes.head(10) # use first 10 for this demo
print(f"Loaded {len(dishes)} dishes")
print(dishes[["dish_name", "url", "food_tags"]].to_string(index=False))
Loaded 10 dishes
dish_name url food_tags
rajma chawal https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images/001%20Large.jpeg Indian
paneer bhurji https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images/002%20Large.jpeg Indian
moong dal https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images/003%20Large.jpeg Indian
Butter chicken https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images/004%20Large.jpeg Indian
porridge https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images/IMG_0898.jpeg Scottish
baked potato https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images/IMG_5728.jpeg Scottish
haggis bonbons and steak and ale pie https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images/IMG_5743.jpeg Scottish
butter chicken with special fried rice and assorted naan breads https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images/IMG_5786.jpeg Indian
duck breast and rump lamb https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images/IMG_5812.jpeg British
traditional scottish and continental breakfast https://raw.githubusercontent.com/aperture-data/Cookbook/refs/heads/main/images/IMG_5815.jpeg Scottish

Step: Create a DescriptorSet for image embeddings

SET_NAME = "dish_image_search"

client.query([{
"AddDescriptorSet": {
"name": SET_NAME,
"dimensions": 512, # CLIP ViT-B/32 output size
"engine": "HNSW", # approximate nearest neighbor for larger sets
"metric": "CS", # cosine similarity
}
}])
client.print_last_response()
[
{
"AddDescriptorSet": {
"status": 0
}
}
]

Step: Add images with embeddings in one transaction

Each AddImage + AddDescriptor pair is a single atomic transaction. The descriptor is linked to the image via connect. The CLIP model encodes the PIL image directly.

import requests
from PIL import Image
from io import BytesIO
import numpy as np

def fix_url(url):
"""Normalize Cookbook image URLs: refs/heads/main -> main, spaces -> underscores."""
return url.replace("refs/heads/main", "main").replace("%20", "_").replace(" ", "_")

for _, row in dishes.iterrows():
img_url = fix_url(row["url"])
resp = requests.get(img_url, timeout=10)
resp.raise_for_status()
img_bytes = resp.content
img = Image.open(BytesIO(img_bytes)).convert("RGB")
emb = model.encode(img, normalize_embeddings=True).astype("float32")

q = [
{
"AddImage": {
"_ref": 1,
"properties": {
"dish_name": row["dish_name"],
"cuisine": row["food_tags"],
}
}
},
{
"AddDescriptor": {
"set": SET_NAME,
"connect": {"ref": 1, "class": "has_embedding"},
"properties": {
"dish_name": row["dish_name"],
"cuisine": row["food_tags"],
},
"connect": {
"ref": 1,
"class": "has_embedding"
}
}
}
]
# blob[0] -> AddImage (raw image bytes), blob[1] -> AddDescriptor (embedding)
client.query(q, [img_bytes, emb.tobytes()])

print("Done adding images and embeddings")

Done adding images and embeddings

CLIP encodes text and images into the same embedding space, so a text query returns the visually closest images.

query_text = "creamy curry with rice"

query_emb = model.encode(query_text, normalize_embeddings=True).astype("float32")

# Find top-3 descriptors, then traverse to linked images
q = [
{
"FindDescriptor": {
"set": SET_NAME,
"k_neighbors": 3,
"distances": True,
"_ref": 1,
"results": {"all_properties": True},
}
},
{
"FindImage": {
"is_connected_to": {"ref": 1, "connection_class": "has_embedding"},
"blobs": False,
"results": {"all_properties": True},
}
}
]

response, _ = client.query(q, [query_emb.tobytes()])

if not isinstance(response, list) or response[0].get("FindDescriptor", {}).get("status", 0) != 0:
print("Query failed — check the response:")
client.print_last_response()
else:
descriptors = response[0]["FindDescriptor"].get("entities", [])
images = response[1]["FindImage"].get("entities", [])

print(f'Query: "{query_text}"\n')
for desc, img in zip(descriptors, images):
print(f" {img['dish_name']:<25} cuisine={img['cuisine']:<15} distance={desc['_distance']:.4f}")
Query: "creamy curry with rice"

rajma chawal cuisine=Indian distance=0.3054
moong dal cuisine=Indian distance=0.2955
Butter chicken cuisine=Indian distance=0.2759

Step: Cleanup (optional)

client.query([{"DeleteDescriptorSet": {"with_name": SET_NAME}}])
client.query([{"DeleteImage": {}}])
client.print_last_response()
[
{
"DeleteImage": {
"count": 10,
"status": 0
}
}
]