Skip to main content

Overview

AgenticRAG is the main class that orchestrates the complete RAG pipeline. It handles document indexing, query processing, retrieval, reranking, and answer generation.

Constructor

class AgenticRAG:
    def __init__(
        self,
        vector_store: VectorStore,
        embedding_model: EmbeddingModel,
        llm_config: Optional[LLMConfig] = None,
        retrieval_config: Optional[RetrievalConfig] = None,
        reranker_config: Optional[RerankerConfig] = None,
        observability_config: Optional[ObservabilityConfig] = None
    )

Parameters

vector_store
VectorStore
required
Vector store instance for document storage and retrieval
embedding_model
EmbeddingModel
required
Embedding model for generating vector representations
llm_config
LLMConfig
default:"LLMConfig()"
Configuration for the language model
retrieval_config
RetrievalConfig
default:"RetrievalConfig()"
Configuration for retrieval behavior
reranker_config
RerankerConfig
default:"RerankerConfig()"
Configuration for reranking strategy
observability_config
ObservabilityConfig
default:"ObservabilityConfig()"
Configuration for observability and monitoring

Example

import os
from mini import (
    AgenticRAG,
    LLMConfig,
    RetrievalConfig,
    EmbeddingModel,
    VectorStore
)

# Initialize components
embedding_model = EmbeddingModel()
vector_store = VectorStore(
    uri=os.getenv("MILVUS_URI"),
    token=os.getenv("MILVUS_TOKEN"),
    collection_name="documents",
    dimension=1536
)

# Create RAG instance
rag = AgenticRAG(
    vector_store=vector_store,
    embedding_model=embedding_model,
    llm_config=LLMConfig(
        model="gpt-4o-mini",
        temperature=0.7
    ),
    retrieval_config=RetrievalConfig(
        top_k=10,
        rerank_top_k=3,
        use_query_rewriting=True,
        use_reranking=True
    )
)

Methods

query

Query the RAG system and get an answer.
def query(
    self,
    query: str,
    top_k: Optional[int] = None,
    rerank_top_k: Optional[int] = None,
    return_sources: bool = True
) -> RAGResponse

Parameters

query
str
required
The question to ask
top_k
int
Number of chunks to retrieve (overrides config default)
rerank_top_k
int
Number of chunks to keep after reranking (overrides config default)
return_sources
bool
default:"True"
Whether to return source chunks

Returns

RAGResponse
object

Example

# Basic query
response = rag.query("What is the budget for education?")
print(response.answer)

# With custom parameters
response = rag.query(
    query="What are the key findings?",
    top_k=15,
    rerank_top_k=5,
    return_sources=True
)

# Access response details
print(f"Answer: {response.answer}")
print(f"Sources: {len(response.retrieved_chunks)}")
print(f"Query variations: {response.rewritten_queries}")

index_document

Index a single document into the vector store.
def index_document(
    self,
    document_path: str,
    metadata: Optional[Dict[str, Any]] = None
) -> int

Parameters

document_path
str
required
Path to the document file
metadata
Dict[str, Any]
Optional metadata to attach to all chunks from this document

Returns

num_chunks
int
Number of chunks created and indexed

Example

# Basic indexing
num_chunks = rag.index_document("document.pdf")
print(f"Indexed {num_chunks} chunks")

# With metadata
num_chunks = rag.index_document(
    "research_paper.pdf",
    metadata={
        "category": "research",
        "year": 2024,
        "author": "John Doe",
        "department": "AI Research"
    }
)

index_documents

Index multiple documents at once.
def index_documents(
    self,
    document_paths: List[str],
    metadata: Optional[Dict[str, Any]] = None
) -> int

Parameters

document_paths
List[str]
required
List of paths to document files
metadata
Dict[str, Any]
Optional metadata to attach to all chunks from all documents

Returns

total_chunks
int
Total number of chunks indexed across all documents

Example

documents = [
    "doc1.pdf",
    "doc2.docx",
    "doc3.txt"
]

total_chunks = rag.index_documents(documents)
print(f"Indexed {total_chunks} chunks from {len(documents)} documents")

get_stats

Get statistics about the RAG system.
def get_stats(self) -> Dict[str, Any]

Returns

stats
Dict[str, Any]

Example

stats = rag.get_stats()
print(f"Total chunks: {stats['total_documents']}")
print(f"Collection: {stats['collection_name']}")

RAGResponse

The response object returned by the query method.

Attributes

answer
str
The generated answer to the query
retrieved_chunks
List[Chunk]
List of source chunks used to generate the answer
original_query
str
The original query string
rewritten_queries
List[str]
Query variations generated by query rewriting (if enabled)
metadata
Dict[str, Any]
Additional metadata about the query execution

Example

response = rag.query("What is the main topic?")

# Access answer
print(response.answer)

# Access sources
for chunk in response.retrieved_chunks:
    print(f"Score: {chunk.reranked_score or chunk.score}")
    print(f"Text: {chunk.text[:100]}...")
    print(f"Metadata: {chunk.metadata}")

# Access query variations
print(f"Original: {response.original_query}")
print(f"Variations: {response.rewritten_queries}")

Complete Example

import os
from mini import (
    AgenticRAG,
    LLMConfig,
    RetrievalConfig,
    RerankerConfig,
    ObservabilityConfig,
    EmbeddingModel,
    VectorStore
)
from dotenv import load_dotenv

load_dotenv()

# Initialize components
embedding_model = EmbeddingModel()
vector_store = VectorStore(
    uri=os.getenv("MILVUS_URI"),
    token=os.getenv("MILVUS_TOKEN"),
    collection_name="knowledge_base",
    dimension=1536
)

# Create RAG with all configurations
rag = AgenticRAG(
    vector_store=vector_store,
    embedding_model=embedding_model,
    llm_config=LLMConfig(
        model="gpt-4o-mini",
        temperature=0.7,
        timeout=60.0
    ),
    retrieval_config=RetrievalConfig(
        top_k=10,
        rerank_top_k=3,
        use_query_rewriting=True,
        use_reranking=True,
        use_hybrid_search=True
    ),
    reranker_config=RerankerConfig(
        type="cohere",
        kwargs={"model": "rerank-english-v3.0"}
    ),
    observability_config=ObservabilityConfig(
        enabled=True
    )
)

# Index documents
documents = ["doc1.pdf", "doc2.pdf", "doc3.pdf"]
total_chunks = rag.index_documents(documents)
print(f"Indexed {total_chunks} chunks")

# Query the system
response = rag.query(
    "What are the key findings?",
    top_k=15,
    rerank_top_k=5
)

print(f"Answer: {response.answer}")
print(f"Used {len(response.retrieved_chunks)} sources")

# Get statistics
stats = rag.get_stats()
print(f"Total chunks: {stats['total_documents']}")

See Also