Platform Integration

This guide shows how to integrate ragit with popular frameworks and tools.

Flask Integration

Basic Flask Application

from flask import Flask, request, jsonify
from ragit import RAGAssistant

app = Flask(__name__)

# Create assistant at startup (NOT thread-safe, see below)
assistant = RAGAssistant("docs/")

@app.route("/ask", methods=["POST"])
def ask():
    data = request.json
    question = data.get("question", "")

    if not question:
        return jsonify({"error": "No question provided"}), 400

    answer = assistant.ask(question)
    return jsonify({"answer": answer})

@app.route("/health")
def health():
    return jsonify({"status": "ok"})

if __name__ == "__main__":
    app.run(debug=True)

Thread-Safe Flask Application

For production with multiple workers, use a factory pattern:

from flask import Flask, request, jsonify, g
from ragit import RAGAssistant

app = Flask(__name__)

def get_assistant():
    """Get or create assistant for current request context."""
    if "assistant" not in g:
        g.assistant = RAGAssistant(
            "docs/",
            chunk_size=512,
            chunk_overlap=50
        )
    return g.assistant

@app.teardown_appcontext
def teardown_assistant(exception):
    """Clean up assistant after request."""
    g.pop("assistant", None)

@app.route("/ask", methods=["POST"])
def ask():
    data = request.json
    question = data.get("question", "")

    if not question:
        return jsonify({"error": "No question provided"}), 400

    assistant = get_assistant()
    answer = assistant.ask(question)

    return jsonify({
        "question": question,
        "answer": answer
    })

@app.route("/retrieve", methods=["POST"])
def retrieve():
    data = request.json
    question = data.get("question", "")
    top_k = data.get("top_k", 3)

    assistant = get_assistant()
    results = assistant.retrieve(question, top_k=top_k)

    return jsonify({
        "question": question,
        "results": [
            {
                "content": chunk.content,
                "doc_id": chunk.doc_id,
                "score": score
            }
            for chunk, score in results
        ]
    })

if __name__ == "__main__":
    app.run(debug=True, threaded=True)

Using with curl:

# Ask a question
curl -X POST http://localhost:5000/ask \
     -H "Content-Type: application/json" \
     -d '{"question": "How do I install ragit?"}'

# Retrieve context
curl -X POST http://localhost:5000/retrieve \
     -H "Content-Type: application/json" \
     -d '{"question": "What is RAG?", "top_k": 5}'

FastAPI Integration

Basic FastAPI Application

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from ragit import RAGAssistant

app = FastAPI(title="RAG API", version="1.0.0")

# Global assistant (for simple deployments)
assistant = RAGAssistant("docs/")

class QuestionRequest(BaseModel):
    question: str
    top_k: int = 3

class AnswerResponse(BaseModel):
    question: str
    answer: str

class RetrieveResponse(BaseModel):
    question: str
    results: list[dict]

@app.post("/ask", response_model=AnswerResponse)
async def ask(request: QuestionRequest):
    if not request.question.strip():
        raise HTTPException(status_code=400, detail="Question cannot be empty")

    answer = assistant.ask(request.question)
    return AnswerResponse(question=request.question, answer=answer)

@app.post("/retrieve", response_model=RetrieveResponse)
async def retrieve(request: QuestionRequest):
    results = assistant.retrieve(request.question, top_k=request.top_k)

    return RetrieveResponse(
        question=request.question,
        results=[
            {
                "content": chunk.content,
                "doc_id": chunk.doc_id,
                "score": float(score)
            }
            for chunk, score in results
        ]
    )

@app.get("/health")
async def health():
    return {"status": "ok"}

Production FastAPI with Dependency Injection

from contextlib import asynccontextmanager
from fastapi import FastAPI, Depends, HTTPException
from pydantic import BaseModel
from ragit import RAGAssistant
from ragit.providers import OllamaProvider
import threading

# Thread-local storage for assistants
_local = threading.local()

def get_assistant() -> RAGAssistant:
    """Get thread-local assistant instance."""
    if not hasattr(_local, "assistant"):
        _local.assistant = RAGAssistant(
            "docs/",
            chunk_size=512,
            chunk_overlap=50
        )
    return _local.assistant

@asynccontextmanager
async def lifespan(app: FastAPI):
    # Startup: verify Ollama is available
    provider = OllamaProvider()
    if not provider.is_available():
        raise RuntimeError("Ollama server not available")
    yield
    # Shutdown: cleanup if needed

app = FastAPI(
    title="RAG API",
    version="1.0.0",
    lifespan=lifespan
)

class QuestionRequest(BaseModel):
    question: str
    top_k: int = 3

class AnswerResponse(BaseModel):
    question: str
    answer: str
    sources: list[dict] | None = None

@app.post("/ask", response_model=AnswerResponse)
async def ask(
    request: QuestionRequest,
    assistant: RAGAssistant = Depends(get_assistant)
):
    if not request.question.strip():
        raise HTTPException(status_code=400, detail="Question cannot be empty")

    # Get answer with sources
    context = assistant.retrieve(request.question, top_k=request.top_k)
    answer = assistant.ask(request.question)

    return AnswerResponse(
        question=request.question,
        answer=answer,
        sources=[
            {"doc_id": chunk.doc_id, "score": float(score)}
            for chunk, score in context
        ]
    )

@app.get("/models")
async def list_models():
    """List available models."""
    provider = OllamaProvider()
    return {
        "llm_models": ["llama3", "mistral", "codellama"],
        "embedding_models": ["mxbai-embed-large", "nomic-embed-text"]
    }

Running FastAPI:

# Install uvicorn
pip install uvicorn

# Run the server
uvicorn app:app --reload --host 0.0.0.0 --port 8000

# With multiple workers (production)
uvicorn app:app --workers 4 --host 0.0.0.0 --port 8000

Command Line Interface

Basic CLI Tool

#!/usr/bin/env python3
"""Simple RAG CLI tool."""

import argparse
import sys
from ragit import RAGAssistant
from ragit.providers import OllamaProvider

def main():
    parser = argparse.ArgumentParser(
        description="RAG-powered Q&A from documents"
    )
    parser.add_argument(
        "docs_path",
        help="Path to documents directory"
    )
    parser.add_argument(
        "-q", "--question",
        help="Question to ask (interactive if not provided)"
    )
    parser.add_argument(
        "--chunk-size",
        type=int,
        default=512,
        help="Chunk size (default: 512)"
    )
    parser.add_argument(
        "--top-k",
        type=int,
        default=3,
        help="Number of chunks to retrieve (default: 3)"
    )
    parser.add_argument(
        "--show-sources",
        action="store_true",
        help="Show source chunks with answer"
    )

    args = parser.parse_args()

    # Check Ollama availability
    provider = OllamaProvider()
    if not provider.is_available():
        print("Error: Ollama server not available", file=sys.stderr)
        print("Start with: ollama serve", file=sys.stderr)
        sys.exit(1)

    # Create assistant
    print(f"Loading documents from {args.docs_path}...")
    assistant = RAGAssistant(
        args.docs_path,
        chunk_size=args.chunk_size
    )
    print("Ready!\n")

    def ask_question(question: str):
        if args.show_sources:
            # Get sources first
            results = assistant.retrieve(question, top_k=args.top_k)
            print("\nSources:")
            for i, (chunk, score) in enumerate(results, 1):
                print(f"  {i}. [{chunk.doc_id}] (score: {score:.3f})")
                print(f"     {chunk.content[:100]}...")
            print()

        answer = assistant.ask(question)
        print(f"Answer: {answer}\n")

    if args.question:
        # Single question mode
        ask_question(args.question)
    else:
        # Interactive mode
        print("Enter questions (Ctrl+C to exit):\n")
        try:
            while True:
                question = input("Q: ").strip()
                if question:
                    ask_question(question)
        except KeyboardInterrupt:
            print("\nGoodbye!")

if __name__ == "__main__":
    main()

Save as rag_cli.py and use:

# Single question
python rag_cli.py docs/ -q "How do I install?"

# Interactive mode with sources
python rag_cli.py docs/ --show-sources

# Custom settings
python rag_cli.py docs/ --chunk-size 1024 --top-k 5

Advanced CLI with Click

#!/usr/bin/env python3
"""Advanced RAG CLI with Click."""

import click
import json
from ragit import RAGAssistant, RagitExperiment, Document, BenchmarkQuestion
from ragit.providers import OllamaProvider

@click.group()
def cli():
    """RAG-powered document Q&A tool."""
    pass

@cli.command()
@click.argument("docs_path")
@click.option("-q", "--question", help="Question to ask")
@click.option("--chunk-size", default=512, help="Chunk size")
@click.option("--top-k", default=3, help="Number of chunks")
@click.option("--json-output", is_flag=True, help="Output as JSON")
def ask(docs_path, question, chunk_size, top_k, json_output):
    """Ask questions about documents."""
    assistant = RAGAssistant(docs_path, chunk_size=chunk_size)

    if question:
        answer = assistant.ask(question)
        if json_output:
            click.echo(json.dumps({"question": question, "answer": answer}))
        else:
            click.echo(f"Answer: {answer}")
    else:
        # Interactive mode
        while True:
            try:
                q = click.prompt("Question", default="", show_default=False)
                if not q:
                    continue
                answer = assistant.ask(q)
                click.echo(f"Answer: {answer}\n")
            except click.Abort:
                break

@cli.command()
@click.argument("docs_path")
@click.argument("query")
@click.option("--top-k", default=5, help="Number of results")
def search(docs_path, query, top_k):
    """Search for relevant document chunks."""
    assistant = RAGAssistant(docs_path)
    results = assistant.retrieve(query, top_k=top_k)

    for i, (chunk, score) in enumerate(results, 1):
        click.echo(f"\n{i}. Score: {score:.3f} | Source: {chunk.doc_id}")
        click.echo(f"   {chunk.content[:200]}...")

@cli.command()
def check():
    """Check if Ollama is available."""
    provider = OllamaProvider()
    if provider.is_available():
        click.echo("Ollama is available")
        click.echo(f"URL: {provider.base_url}")
    else:
        click.echo("Ollama is NOT available", err=True)
        raise SystemExit(1)

if __name__ == "__main__":
    cli()

Usage:

# Check Ollama
python rag_cli.py check

# Ask question
python rag_cli.py ask docs/ -q "What is ragit?"

# Search documents
python rag_cli.py search docs/ "installation instructions"

# Interactive mode
python rag_cli.py ask docs/

Jupyter Notebook Integration

Using ragit in Jupyter notebooks:

# Cell 1: Setup
from ragit import RAGAssistant, load_directory
from ragit.providers import OllamaProvider

# Check Ollama
provider = OllamaProvider()
print(f"Ollama available: {provider.is_available()}")

# Cell 2: Create assistant
assistant = RAGAssistant(
    "docs/",
    chunk_size=512,
    chunk_overlap=50
)
print(f"Loaded {len(assistant._chunks)} chunks")

# Cell 3: Interactive Q&A
def ask(question: str):
    """Helper function for notebook Q&A."""
    print(f"Q: {question}\n")

    # Show sources
    results = assistant.retrieve(question, top_k=3)
    print("Sources:")
    for chunk, score in results:
        print(f"  - {chunk.doc_id} (score: {score:.3f})")

    # Generate answer
    answer = assistant.ask(question)
    print(f"\nA: {answer}")

# Usage
ask("How do I configure the database?")

# Cell 4: Visualization (optional)
import matplotlib.pyplot as plt

# Visualize chunk scores for a query
results = assistant.retrieve("What is RAG?", top_k=10)
scores = [score for _, score in results]
labels = [f"Chunk {i}" for i in range(len(scores))]

plt.figure(figsize=(10, 5))
plt.bar(labels, scores)
plt.xlabel("Chunks")
plt.ylabel("Similarity Score")
plt.title("Chunk Relevance Scores")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Docker Deployment

Dockerfile for ragit application:

FROM python:3.12-slim

WORKDIR /app

# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application
COPY app.py .
COPY docs/ ./docs/

# Environment variables
ENV OLLAMA_BASE_URL=http://ollama:11434
ENV RAGIT_DEFAULT_LLM_MODEL=llama3

EXPOSE 8000

CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

Docker Compose with Ollama:

version: '3.8'

services:
  ollama:
    image: ollama/ollama
    ports:
      - "11434:11434"
    volumes:
      - ollama_data:/root/.ollama

  rag-api:
    build: .
    ports:
      - "8000:8000"
    environment:
      - OLLAMA_BASE_URL=http://ollama:11434
    depends_on:
      - ollama

volumes:
  ollama_data:

Running:

# Start services
docker-compose up -d

# Pull models in Ollama container
docker-compose exec ollama ollama pull llama3
docker-compose exec ollama ollama pull mxbai-embed-large

# Test the API
curl http://localhost:8000/ask \
     -H "Content-Type: application/json" \
     -d '{"question": "What is ragit?"}'