Overview
VectorStore provides vector storage and similarity search using Milvus. It handles collection management, indexing, search, and hybrid search operations.
Constructor
from mini.store import VectorStore
vector_store = VectorStore(
uri: str,
token: str,
collection_name: str,
dimension: int,
metric_type: str = "IP",
index_type: str = "IVF_FLAT",
nlist: int = 128
)
Parameters
Name of the collection to use or create
Dimension of embedding vectors
Distance metric: “IP” (inner product), “L2”, or “COSINE”
Index algorithm: “IVF_FLAT”, “IVF_SQ8”, or “HNSW”
Number of cluster units for IVF indexes
Example
import os
from mini.store import VectorStore
vector_store = VectorStore(
uri=os.getenv("MILVUS_URI"),
token=os.getenv("MILVUS_TOKEN"),
collection_name="my_documents",
dimension=1536,
metric_type="IP",
index_type="IVF_FLAT"
)
Methods
insert
Insert vectors with their texts and metadata.
def insert(
self,
embeddings: List[List[float]],
texts: List[str],
metadata: Optional[List[Dict[str, Any]]] = None
) -> List[int]
Parameters
embeddings
List[List[float]]
required
List of embedding vectors
List of text strings corresponding to embeddings
Optional list of metadata dictionaries
Returns
List of assigned IDs for the inserted vectors
Example
# Insert vectors
ids = vector_store.insert(
embeddings=embeddings,
texts=["Text 1", "Text 2", "Text 3"],
metadata=[
{"source": "doc1.pdf", "page": 1},
{"source": "doc1.pdf", "page": 2},
{"source": "doc2.pdf", "page": 1}
]
)
print(f"Inserted {len(ids)} vectors")
search
Search for similar vectors.
def search(
self,
query_embedding: List[float],
top_k: int = 5,
filter_expr: Optional[str] = None,
output_fields: Optional[List[str]] = None
) -> List[Dict[str, Any]]
Parameters
Number of results to return
Optional filter expression
Fields to return in results
Returns
List of search results with scores and metadata
Example
# Search
results = vector_store.search(
query_embedding=query_embedding,
top_k=5,
filter_expr='metadata["source"] == "doc1.pdf"',
output_fields=["text", "metadata"]
)
for result in results:
print(f"Score: {result['score']:.3f}")
print(f"Text: {result['text'][:100]}...")
print(f"Metadata: {result['metadata']}")
hybrid_search
Perform hybrid search combining semantic and BM25 keyword search.
def hybrid_search(
self,
query: str,
query_embedding: List[float],
top_k: int = 5,
filter_expr: Optional[str] = None,
output_fields: Optional[List[str]] = None
) -> List[Dict[str, Any]]
Parameters
Query text for BM25 search
Query vector for semantic search
Number of results to return
Optional filter expression
Fields to return in results
Returns
Combined results using RRF fusion
count
Get the number of vectors in the collection.
Returns
Number of vectors in the collection
Example
count = vector_store.count()
print(f"Total vectors: {count}")
delete
Delete vectors matching a filter expression.
def delete(self, expr: str) -> int
Parameters
Filter expression for deletion
Returns
Number of vectors deleted
Example
# Delete by metadata
deleted = vector_store.delete('metadata["source"] == "old_doc.pdf"')
print(f"Deleted {deleted} vectors")
drop_collection
Delete the entire collection.
def drop_collection(self)
Warning: This permanently deletes all data in the collection.
disconnect
Close the connection to Milvus.
Complete Example
import os
from mini import EmbeddingModel, VectorStore
# Initialize
embedding_model = EmbeddingModel()
vector_store = VectorStore(
uri=os.getenv("MILVUS_URI"),
token=os.getenv("MILVUS_TOKEN"),
collection_name="documents",
dimension=1536
)
# Prepare data
texts = [
"Machine learning is a subset of AI.",
"Deep learning uses neural networks.",
"NLP processes human language."
]
# Generate embeddings
embeddings = embedding_model.embed_chunks(texts)
# Insert
ids = vector_store.insert(
embeddings=embeddings,
texts=texts,
metadata=[
{"topic": "ML", "page": 1},
{"topic": "DL", "page": 2},
{"topic": "NLP", "page": 3}
]
)
# Search
query = "What is machine learning?"
query_embedding = embedding_model.embed_query(query)
results = vector_store.search(
query_embedding=query_embedding,
top_k=3
)
for result in results:
print(f"{result['score']:.3f}: {result['text']}")
# Get count
print(f"Total: {vector_store.count()}")
# Cleanup
vector_store.disconnect()
See Also