Document Q&A System

Overview

This example shows how to build a complete document question-answering system using Mini RAG. Users can ask questions and get answers based on your document collection.

Complete Example

import os
from mini import (
    AgenticRAG,
    LLMConfig,
    RetrievalConfig,
    EmbeddingModel,
    VectorStore
)
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

def setup_rag():
    """Initialize the RAG system."""
    # Initialize embedding model
    embedding_model = EmbeddingModel()
    
    # Initialize vector store
    vector_store = VectorStore(
        uri=os.getenv("MILVUS_URI"),
        token=os.getenv("MILVUS_TOKEN"),
        collection_name="company_docs",
        dimension=1536
    )
    
    # Initialize RAG with optimal settings
    rag = AgenticRAG(
        vector_store=vector_store,
        embedding_model=embedding_model,
        llm_config=LLMConfig(
            model="gpt-4o-mini",
            temperature=0.7
        ),
        retrieval_config=RetrievalConfig(
            top_k=10,
            rerank_top_k=3,
            use_query_rewriting=True,
            use_reranking=True
        )
    )
    
    return rag

def index_documents(rag, document_paths):
    """Index a collection of documents."""
    print("📄 Indexing documents...")
    
    total_chunks = 0
    for doc_path in document_paths:
        try:
            num_chunks = rag.index_document(doc_path)
            total_chunks += num_chunks
            print(f"  ✓ {doc_path}: {num_chunks} chunks")
        except Exception as e:
            print(f"  ✗ {doc_path}: Error - {e}")
    
    print(f"\n✅ Total: {total_chunks} chunks indexed\n")
    return total_chunks

def interactive_qa(rag):
    """Run interactive Q&A session."""
    print("=== Document Q&A System ===")
    print("Ask questions about your documents (type 'quit' to exit)\n")
    
    while True:
        # Get user question
        question = input("❓ Question: ").strip()
        
        if question.lower() in ['quit', 'exit', 'q']:
            print("\nGoodbye! 👋")
            break
        
        if not question:
            continue
        
        try:
            # Query the RAG system
            response = rag.query(question)
            
            # Display answer
            print(f"\n💬 Answer:\n{response.answer}\n")
            
            # Show sources (optional)
            if response.retrieved_chunks:
                print(f"📚 Sources ({len(response.retrieved_chunks)} chunks):")
                for i, chunk in enumerate(response.retrieved_chunks, 1):
                    score = chunk.reranked_score or chunk.score
                    source = chunk.metadata.get('source', 'Unknown')
                    print(f"  {i}. [Score: {score:.3f}] {source}")
                print()
            
        except Exception as e:
            print(f"\n❌ Error: {e}\n")

def main():
    # Setup RAG system
    rag = setup_rag()
    
    # Documents to index
    documents = [
        "./docs/employee_handbook.pdf",
        "./docs/company_policies.pdf",
        "./docs/benefits_guide.pdf",
        "./docs/faq.pdf"
    ]
    
    # Index documents (only needed once)
    stats = rag.get_stats()
    if stats['total_documents'] == 0:
        index_documents(rag, documents)
    else:
        print(f"📊 Using existing index: {stats['total_documents']} chunks\n")
    
    # Start interactive Q&A
    interactive_qa(rag)

if __name__ == "__main__":
    main()

Running the Example

Save the Code

Save the code above as document_qa.py

Prepare Documents

Place your documents in a ./docs/ folder

Configure Environment

Ensure your .env file has the required credentials

Run

python document_qa.py

Example Session

📄 Indexing documents...
  ✓ ./docs/employee_handbook.pdf: 45 chunks
  ✓ ./docs/company_policies.pdf: 32 chunks
  ✓ ./docs/benefits_guide.pdf: 28 chunks
  ✓ ./docs/faq.pdf: 15 chunks

✅ Total: 120 chunks indexed

=== Document Q&A System ===
Ask questions about your documents (type 'quit' to exit)

❓ Question: What is the vacation policy?

💬 Answer:
The company provides 15 days of paid vacation per year for full-time 
employees. Vacation days accrue monthly and can be used after completing
3 months of employment. Unused vacation days can be carried over to the
next year, up to a maximum of 5 days.

📚 Sources (3 chunks):
  1. [Score: 0.945] employee_handbook.pdf
  2. [Score: 0.892] company_policies.pdf
  3. [Score: 0.856] benefits_guide.pdf

❓ Question: How do I reset my password?

💬 Answer:
To reset your password, go to the company portal login page and click
"Forgot Password". Enter your email address and you'll receive a reset
link. The link is valid for 24 hours. If you don't receive the email,
check your spam folder or contact IT support.

📚 Sources (3 chunks):
  1. [Score: 0.923] faq.pdf
  2. [Score: 0.901] employee_handbook.pdf
  3. [Score: 0.878] faq.pdf

❓ Question: quit

Goodbye! 👋

Features Demonstrated

Document Indexing

Load and index multiple documents from different formats

Query Processing

Automatic query rewriting and optimized retrieval

Re-ranking

Re-rank results for best quality answers

Source Attribution

Show which documents were used to generate answers

Enhancements

Add Metadata

# Index with metadata for better filtering
rag.index_document(
    "employee_handbook.pdf",
    metadata={
        "category": "hr",
        "department": "human_resources",
        "last_updated": "2024-01-15",
        "version": "2.0"
    }
)

Add Citations

def format_answer_with_citations(response):
    """Format answer with inline citations."""
    answer = response.answer
    
    print(f"\n💬 Answer:\n{answer}\n")
    print("📚 References:")
    for i, chunk in enumerate(response.retrieved_chunks, 1):
        source = chunk.metadata.get('source', 'Unknown')
        page = chunk.metadata.get('page', 'N/A')
        print(f"  [{i}] {source} (page {page})")
        print(f"      {chunk.text[:100]}...")
    print()

Add History

class QASession:
    def __init__(self, rag):
        self.rag = rag
        self.history = []
    
    def ask(self, question):
        response = self.rag.query(question)
        self.history.append({
            "question": question,
            "answer": response.answer,
            "sources": len(response.retrieved_chunks)
        })
        return response
    
    def show_history(self):
        print("\n📜 Session History:")
        for i, item in enumerate(self.history, 1):
            print(f"{i}. Q: {item['question']}")
            print(f"   A: {item['answer'][:100]}...")
            print(f"   Sources: {item['sources']}\n")

Add Filters

# Allow filtering by category
def ask_with_filter(rag, question, category=None):
    """Ask question with optional category filter."""
    # Note: Filtering would be implemented in the search call
    response = rag.query(question)
    return response

# Usage
response = ask_with_filter(rag, "What is the policy?", category="hr")

Production Enhancements

Add Observability

from mini import ObservabilityConfig

rag = AgenticRAG(
    vector_store=vector_store,
    embedding_model=embedding_model,
    observability_config=ObservabilityConfig(enabled=True)
)

Add Caching

from functools import lru_cache

@lru_cache(maxsize=100)
def cached_query(question: str):
    """Cache common questions."""
    return rag.query(question)

Add Error Handling

def robust_query(rag, question, max_retries=3):
    """Query with retry logic."""
    for attempt in range(max_retries):
        try:
            return rag.query(question)
        except Exception as e:
            if attempt == max_retries - 1:
                raise
            print(f"Retry {attempt + 1}/{max_retries} after error: {e}")
            time.sleep(1 * (attempt + 1))

Next Steps

Chatbot

Build a chatbot with conversation history

FastAPI Integration

Deploy as a REST API

Research Assistant

Analyze research papers

Production Guide

Deploy to production

Getting Started

Core Concepts

Features

Guides

Examples

Document Q&A System

Overview

Complete Example

Running the Example

Example Session

Features Demonstrated

Document Indexing

Query Processing

Re-ranking

Source Attribution

Enhancements

Add Metadata

Add Citations

Add History

Add Filters

Production Enhancements

Add Observability

Add Caching

Add Error Handling

Next Steps

Chatbot

FastAPI Integration

Research Assistant

Production Guide

Getting Started

Core Concepts

Features

Guides

Examples

​Overview

​Complete Example

​Running the Example

​Example Session

​Features Demonstrated

Document Indexing

Query Processing

Re-ranking

Source Attribution

​Enhancements

​Add Metadata

​Add Citations

​Add History

​Add Filters

​Production Enhancements

​Add Observability

​Add Caching

​Add Error Handling

​Next Steps

Chatbot

FastAPI Integration

Research Assistant

Production Guide

Overview

Complete Example

Running the Example

Example Session

Features Demonstrated

Enhancements

Add Metadata

Add Citations

Add History

Add Filters

Production Enhancements

Add Observability

Add Caching

Add Error Handling

Next Steps