I’m trying to run the example file “hello_milvus.py” and it is very slow.
Even querying for the number of entities takes ~0.5s.
I’m using milvus2 rc8 with docker-compose.
Here is my docker compose:
version: '3.5'
services:
etcd:
container_name: milvus-etcd
image: quay.io/coreos/etcd:v3.5.0
environment:
- ETCD_AUTO_COMPACTION_MODE=revision
- ETCD_AUTO_COMPACTION_RETENTION=1000
- ETCD_QUOTA_BACKEND_BYTES=4294967296
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
minio:
container_name: milvus-minio
image: minio/minio:RELEASE.2020-12-03T00-03-10Z
environment:
MINIO_ACCESS_KEY: minioadmin
MINIO_SECRET_KEY: minioadmin
command: minio server /minio_data
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
standalone:
container_name: milvus-standalone
image: milvusdb/daily-build:master-20211216-5864e5e
command: ["milvus", "run", "standalone"]
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- 30100:19530
depends_on:
- "etcd"
- "minio"
networks:
default:
name: milvus
``
And the python script (very few modifications with respect to the example):
`
#!venv/bin/python3
import math
import time
import random
from pymilvus import (
connections, list_collections,
FieldSchema, CollectionSchema, DataType,
Collection
)
class Timer:
"""Furnish a context manager for timing the functions."""
def __init__(self, message):
"""Initialize with the main message."""
self.message = message
self.init_time = None
self.end_time = None
self.elapsed = None
def __enter__(self):
"""Will write a message when something is really written."""
self.init_time = time.time()
return self
def current(self):
"""Return the elapsed time from the enter."""
now = time.time()
return now - self.init_time
def __exit__(self, *args):
"""Compute the elapsed time and print a message."""
self.elapsed = self.current()
print(f"-- timer -- {self.message} -- {self.elapsed}")
def hello_milvus():
# create connection
connections.connect(host="localhost", port=30100)
# create collection
dim = 128
default_fields = [
FieldSchema(name="count", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
default_schema = CollectionSchema(fields=default_fields,
description="test collection")
print("\nCreate collection...")
collection = Collection(name="hello_milvus", schema=default_schema)
print("\nList collections...")
print(list_collections())
# insert data
nb = 5000
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
collection.insert([[i for i in range(nb)], vectors])
# nlist is a power of 2 near to sqrt(nb)*4
nlist = 2**math.trunc(math.log(math.sqrt(nb)*4)/math.log(2))
print(f"create index with nlist={nlist}")
default_index = {"index_type": "IVF_FLAT", "params": {"nlist": nlist},
"metric_type": "L2"}
collection.create_index(field_name="float_vector",
index_params=default_index)
print("\nload collection...")
collection.load()
with Timer("Get collection entities..."):
print(collection.num_entities)
# load and search
search_params = {"metric_type": "L2", "params": {"nprobe": 100}}
with Timer("Search"):
res = collection.search(
vectors[-2:], "float_vector", search_params, limit=1,
output_fields=["count"])
# show result
for hits in res:
for hit in hits:
print(hit)
collection.drop()
hello_milvus()
`
- the call to collection.num_entities takes 0.5s
- the collection.search takes 0.3s
The times are almost not changed if I search in 100 vectors or 100000 vectors.
I'm pretty sure I'm making something wrong in my setting (cpu cache, docker, ...)