Skip to main content

Dataset Loading

Create input CSV file with video metadata to be loaded into ApertureDB

from video_helpers import videos
input_file = "sample_data/yfcc100m_dataset_100_videos_urls.txt" # URL for video files from yfcc dataset
output_video_path = "sample_data/videos" # path to download yfcc videos
videos.generate_video_csv(input_file, output_video_path) # generate Aperture-friendly csv file

Download videos locally before we can store them in ApertureDB

# For existing data, this would just need to right pointer to the location
from video_helpers import videos
videos.download_videos("sample_data/videos/videos.adb.csv", "results/err_download.txt") # occassionally some urls might not work

Load videos and metadata using ApertureDB loaders

Metadata schema for video queries


Create a connection to ApertureDB

from aperturedb import Connector
from aperturedb import Utils
import json

# ApertureDB Server Info for establishing connection
db_host = "aperturedb.local" # assuming local installation as provided
user = "admin" # requires authentication
password = input("Please insert your password: ") # use the password provided for the instance or "admin" by default

db = Connector.Connector(db_host, user=user, password=password)
print("Connected to ApertureDB server...")

# Check status of server
utils = Utils.Utils(db)

Creating an index

# Just like any database, ApertureDB supports indexes on the
# combination of <object class, property key> that will be used
# frequently to search. These indexes can be added before or after
# loading data although it's better to create them before.
# More information here:
query = [{
"CreateIndex": {
"index_type": "entity", # index on an entity, not a connection
"class" : "_Video", # special entity type natively recognized by ApertureDB
"property_key" : "guid",
"property_type": "string"
"CreateIndex": {
"index_type": "entity", # index on an entity, not a connection
"class" : "Camera", # special entity type natively recognized by ApertureDB
"property_key" : "id",
"property_type": "integer"

response, blobs = db.query(query)


Use helper function to load all the video and metadata shown in the schema

from video_helpers import videos

#Configure error logging.
import logging
logger = logging.getLogger("aperturedb")
# Aperturedb has 2 log handlers, first is a file, and second a stream(for stderr)
# Log only CRITICAL or higher on the console.
# The errors would still be available in errror.<timestamp>.log file for postmortem
# Setting the logger to be less verbose. The level can be either of

input_file_path = "sample_data"

# ApertureDB loader parameters for transaction size and parallelism
batch_size = 1
num_threads = 10

# Run the loaders for video and metadata
videos.load_all(db, input_file_path, batch_size, num_threads, "results/err_format.txt")