Skip to main content

Overview

VectorStore provides vector storage and similarity search using Milvus. It handles collection management, indexing, search, and hybrid search operations.

Constructor

from mini.store import VectorStore

vector_store = VectorStore(
    uri: str,
    token: str,
    collection_name: str,
    dimension: int,
    metric_type: str = "IP",
    index_type: str = "IVF_FLAT",
    nlist: int = 128
)

Parameters

uri
str
required
Milvus server URI
token
str
required
Authentication token
collection_name
str
required
Name of the collection to use or create
dimension
int
required
Dimension of embedding vectors
metric_type
str
default:"IP"
Distance metric: “IP” (inner product), “L2”, or “COSINE”
index_type
str
default:"IVF_FLAT"
Index algorithm: “IVF_FLAT”, “IVF_SQ8”, or “HNSW”
nlist
int
default:"128"
Number of cluster units for IVF indexes

Example

import os
from mini.store import VectorStore

vector_store = VectorStore(
    uri=os.getenv("MILVUS_URI"),
    token=os.getenv("MILVUS_TOKEN"),
    collection_name="my_documents",
    dimension=1536,
    metric_type="IP",
    index_type="IVF_FLAT"
)

Methods

insert

Insert vectors with their texts and metadata.
def insert(
    self,
    embeddings: List[List[float]],
    texts: List[str],
    metadata: Optional[List[Dict[str, Any]]] = None
) -> List[int]

Parameters

embeddings
List[List[float]]
required
List of embedding vectors
texts
List[str]
required
List of text strings corresponding to embeddings
metadata
List[Dict[str, Any]]
Optional list of metadata dictionaries

Returns

ids
List[int]
List of assigned IDs for the inserted vectors

Example

# Insert vectors
ids = vector_store.insert(
    embeddings=embeddings,
    texts=["Text 1", "Text 2", "Text 3"],
    metadata=[
        {"source": "doc1.pdf", "page": 1},
        {"source": "doc1.pdf", "page": 2},
        {"source": "doc2.pdf", "page": 1}
    ]
)

print(f"Inserted {len(ids)} vectors")
Search for similar vectors.
def search(
    self,
    query_embedding: List[float],
    top_k: int = 5,
    filter_expr: Optional[str] = None,
    output_fields: Optional[List[str]] = None
) -> List[Dict[str, Any]]

Parameters

query_embedding
List[float]
required
Query vector
top_k
int
default:"5"
Number of results to return
filter_expr
str
Optional filter expression
output_fields
List[str]
Fields to return in results

Returns

results
List[Dict[str, Any]]
List of search results with scores and metadata

Example

# Search
results = vector_store.search(
    query_embedding=query_embedding,
    top_k=5,
    filter_expr='metadata["source"] == "doc1.pdf"',
    output_fields=["text", "metadata"]
)

for result in results:
    print(f"Score: {result['score']:.3f}")
    print(f"Text: {result['text'][:100]}...")
    print(f"Metadata: {result['metadata']}")
Perform hybrid search combining semantic and BM25 keyword search.
def hybrid_search(
    self,
    query: str,
    query_embedding: List[float],
    top_k: int = 5,
    filter_expr: Optional[str] = None,
    output_fields: Optional[List[str]] = None
) -> List[Dict[str, Any]]

Parameters

query
str
required
Query text for BM25 search
query_embedding
List[float]
required
Query vector for semantic search
top_k
int
default:"5"
Number of results to return
filter_expr
str
Optional filter expression
output_fields
List[str]
Fields to return in results

Returns

results
List[Dict[str, Any]]
Combined results using RRF fusion

count

Get the number of vectors in the collection.
def count(self) -> int

Returns

count
int
Number of vectors in the collection

Example

count = vector_store.count()
print(f"Total vectors: {count}")

delete

Delete vectors matching a filter expression.
def delete(self, expr: str) -> int

Parameters

expr
str
required
Filter expression for deletion

Returns

deleted_count
int
Number of vectors deleted

Example

# Delete by metadata
deleted = vector_store.delete('metadata["source"] == "old_doc.pdf"')
print(f"Deleted {deleted} vectors")

drop_collection

Delete the entire collection.
def drop_collection(self)
Warning: This permanently deletes all data in the collection.

disconnect

Close the connection to Milvus.
def disconnect(self)

Complete Example

import os
from mini import EmbeddingModel, VectorStore

# Initialize
embedding_model = EmbeddingModel()
vector_store = VectorStore(
    uri=os.getenv("MILVUS_URI"),
    token=os.getenv("MILVUS_TOKEN"),
    collection_name="documents",
    dimension=1536
)

# Prepare data
texts = [
    "Machine learning is a subset of AI.",
    "Deep learning uses neural networks.",
    "NLP processes human language."
]

# Generate embeddings
embeddings = embedding_model.embed_chunks(texts)

# Insert
ids = vector_store.insert(
    embeddings=embeddings,
    texts=texts,
    metadata=[
        {"topic": "ML", "page": 1},
        {"topic": "DL", "page": 2},
        {"topic": "NLP", "page": 3}
    ]
)

# Search
query = "What is machine learning?"
query_embedding = embedding_model.embed_query(query)

results = vector_store.search(
    query_embedding=query_embedding,
    top_k=3
)

for result in results:
    print(f"{result['score']:.3f}: {result['text']}")

# Get count
print(f"Total: {vector_store.count()}")

# Cleanup
vector_store.disconnect()

See Also