Platform Integration
This guide shows how to integrate ragit with popular frameworks and tools.
Flask Integration
Basic Flask Application
from flask import Flask, request, jsonify
from ragit import RAGAssistant
app = Flask(__name__)
# Create assistant at startup (NOT thread-safe, see below)
assistant = RAGAssistant("docs/")
@app.route("/ask", methods=["POST"])
def ask():
data = request.json
question = data.get("question", "")
if not question:
return jsonify({"error": "No question provided"}), 400
answer = assistant.ask(question)
return jsonify({"answer": answer})
@app.route("/health")
def health():
return jsonify({"status": "ok"})
if __name__ == "__main__":
app.run(debug=True)
Thread-Safe Flask Application
For production with multiple workers, use a factory pattern:
from flask import Flask, request, jsonify, g
from ragit import RAGAssistant
app = Flask(__name__)
def get_assistant():
"""Get or create assistant for current request context."""
if "assistant" not in g:
g.assistant = RAGAssistant(
"docs/",
chunk_size=512,
chunk_overlap=50
)
return g.assistant
@app.teardown_appcontext
def teardown_assistant(exception):
"""Clean up assistant after request."""
g.pop("assistant", None)
@app.route("/ask", methods=["POST"])
def ask():
data = request.json
question = data.get("question", "")
if not question:
return jsonify({"error": "No question provided"}), 400
assistant = get_assistant()
answer = assistant.ask(question)
return jsonify({
"question": question,
"answer": answer
})
@app.route("/retrieve", methods=["POST"])
def retrieve():
data = request.json
question = data.get("question", "")
top_k = data.get("top_k", 3)
assistant = get_assistant()
results = assistant.retrieve(question, top_k=top_k)
return jsonify({
"question": question,
"results": [
{
"content": chunk.content,
"doc_id": chunk.doc_id,
"score": score
}
for chunk, score in results
]
})
if __name__ == "__main__":
app.run(debug=True, threaded=True)
Using with curl:
# Ask a question
curl -X POST http://localhost:5000/ask \
-H "Content-Type: application/json" \
-d '{"question": "How do I install ragit?"}'
# Retrieve context
curl -X POST http://localhost:5000/retrieve \
-H "Content-Type: application/json" \
-d '{"question": "What is RAG?", "top_k": 5}'
FastAPI Integration
Basic FastAPI Application
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from ragit import RAGAssistant
app = FastAPI(title="RAG API", version="1.0.0")
# Global assistant (for simple deployments)
assistant = RAGAssistant("docs/")
class QuestionRequest(BaseModel):
question: str
top_k: int = 3
class AnswerResponse(BaseModel):
question: str
answer: str
class RetrieveResponse(BaseModel):
question: str
results: list[dict]
@app.post("/ask", response_model=AnswerResponse)
async def ask(request: QuestionRequest):
if not request.question.strip():
raise HTTPException(status_code=400, detail="Question cannot be empty")
answer = assistant.ask(request.question)
return AnswerResponse(question=request.question, answer=answer)
@app.post("/retrieve", response_model=RetrieveResponse)
async def retrieve(request: QuestionRequest):
results = assistant.retrieve(request.question, top_k=request.top_k)
return RetrieveResponse(
question=request.question,
results=[
{
"content": chunk.content,
"doc_id": chunk.doc_id,
"score": float(score)
}
for chunk, score in results
]
)
@app.get("/health")
async def health():
return {"status": "ok"}
Production FastAPI with Dependency Injection
from contextlib import asynccontextmanager
from fastapi import FastAPI, Depends, HTTPException
from pydantic import BaseModel
from ragit import RAGAssistant
from ragit.providers import OllamaProvider
import threading
# Thread-local storage for assistants
_local = threading.local()
def get_assistant() -> RAGAssistant:
"""Get thread-local assistant instance."""
if not hasattr(_local, "assistant"):
_local.assistant = RAGAssistant(
"docs/",
chunk_size=512,
chunk_overlap=50
)
return _local.assistant
@asynccontextmanager
async def lifespan(app: FastAPI):
# Startup: verify Ollama is available
provider = OllamaProvider()
if not provider.is_available():
raise RuntimeError("Ollama server not available")
yield
# Shutdown: cleanup if needed
app = FastAPI(
title="RAG API",
version="1.0.0",
lifespan=lifespan
)
class QuestionRequest(BaseModel):
question: str
top_k: int = 3
class AnswerResponse(BaseModel):
question: str
answer: str
sources: list[dict] | None = None
@app.post("/ask", response_model=AnswerResponse)
async def ask(
request: QuestionRequest,
assistant: RAGAssistant = Depends(get_assistant)
):
if not request.question.strip():
raise HTTPException(status_code=400, detail="Question cannot be empty")
# Get answer with sources
context = assistant.retrieve(request.question, top_k=request.top_k)
answer = assistant.ask(request.question)
return AnswerResponse(
question=request.question,
answer=answer,
sources=[
{"doc_id": chunk.doc_id, "score": float(score)}
for chunk, score in context
]
)
@app.get("/models")
async def list_models():
"""List available models."""
provider = OllamaProvider()
return {
"llm_models": ["llama3", "mistral", "codellama"],
"embedding_models": ["mxbai-embed-large", "nomic-embed-text"]
}
Running FastAPI:
# Install uvicorn
pip install uvicorn
# Run the server
uvicorn app:app --reload --host 0.0.0.0 --port 8000
# With multiple workers (production)
uvicorn app:app --workers 4 --host 0.0.0.0 --port 8000
Command Line Interface
Basic CLI Tool
#!/usr/bin/env python3
"""Simple RAG CLI tool."""
import argparse
import sys
from ragit import RAGAssistant
from ragit.providers import OllamaProvider
def main():
parser = argparse.ArgumentParser(
description="RAG-powered Q&A from documents"
)
parser.add_argument(
"docs_path",
help="Path to documents directory"
)
parser.add_argument(
"-q", "--question",
help="Question to ask (interactive if not provided)"
)
parser.add_argument(
"--chunk-size",
type=int,
default=512,
help="Chunk size (default: 512)"
)
parser.add_argument(
"--top-k",
type=int,
default=3,
help="Number of chunks to retrieve (default: 3)"
)
parser.add_argument(
"--show-sources",
action="store_true",
help="Show source chunks with answer"
)
args = parser.parse_args()
# Check Ollama availability
provider = OllamaProvider()
if not provider.is_available():
print("Error: Ollama server not available", file=sys.stderr)
print("Start with: ollama serve", file=sys.stderr)
sys.exit(1)
# Create assistant
print(f"Loading documents from {args.docs_path}...")
assistant = RAGAssistant(
args.docs_path,
chunk_size=args.chunk_size
)
print("Ready!\n")
def ask_question(question: str):
if args.show_sources:
# Get sources first
results = assistant.retrieve(question, top_k=args.top_k)
print("\nSources:")
for i, (chunk, score) in enumerate(results, 1):
print(f" {i}. [{chunk.doc_id}] (score: {score:.3f})")
print(f" {chunk.content[:100]}...")
print()
answer = assistant.ask(question)
print(f"Answer: {answer}\n")
if args.question:
# Single question mode
ask_question(args.question)
else:
# Interactive mode
print("Enter questions (Ctrl+C to exit):\n")
try:
while True:
question = input("Q: ").strip()
if question:
ask_question(question)
except KeyboardInterrupt:
print("\nGoodbye!")
if __name__ == "__main__":
main()
Save as rag_cli.py and use:
# Single question
python rag_cli.py docs/ -q "How do I install?"
# Interactive mode with sources
python rag_cli.py docs/ --show-sources
# Custom settings
python rag_cli.py docs/ --chunk-size 1024 --top-k 5
Advanced CLI with Click
#!/usr/bin/env python3
"""Advanced RAG CLI with Click."""
import click
import json
from ragit import RAGAssistant, RagitExperiment, Document, BenchmarkQuestion
from ragit.providers import OllamaProvider
@click.group()
def cli():
"""RAG-powered document Q&A tool."""
pass
@cli.command()
@click.argument("docs_path")
@click.option("-q", "--question", help="Question to ask")
@click.option("--chunk-size", default=512, help="Chunk size")
@click.option("--top-k", default=3, help="Number of chunks")
@click.option("--json-output", is_flag=True, help="Output as JSON")
def ask(docs_path, question, chunk_size, top_k, json_output):
"""Ask questions about documents."""
assistant = RAGAssistant(docs_path, chunk_size=chunk_size)
if question:
answer = assistant.ask(question)
if json_output:
click.echo(json.dumps({"question": question, "answer": answer}))
else:
click.echo(f"Answer: {answer}")
else:
# Interactive mode
while True:
try:
q = click.prompt("Question", default="", show_default=False)
if not q:
continue
answer = assistant.ask(q)
click.echo(f"Answer: {answer}\n")
except click.Abort:
break
@cli.command()
@click.argument("docs_path")
@click.argument("query")
@click.option("--top-k", default=5, help="Number of results")
def search(docs_path, query, top_k):
"""Search for relevant document chunks."""
assistant = RAGAssistant(docs_path)
results = assistant.retrieve(query, top_k=top_k)
for i, (chunk, score) in enumerate(results, 1):
click.echo(f"\n{i}. Score: {score:.3f} | Source: {chunk.doc_id}")
click.echo(f" {chunk.content[:200]}...")
@cli.command()
def check():
"""Check if Ollama is available."""
provider = OllamaProvider()
if provider.is_available():
click.echo("Ollama is available")
click.echo(f"URL: {provider.base_url}")
else:
click.echo("Ollama is NOT available", err=True)
raise SystemExit(1)
if __name__ == "__main__":
cli()
Usage:
# Check Ollama
python rag_cli.py check
# Ask question
python rag_cli.py ask docs/ -q "What is ragit?"
# Search documents
python rag_cli.py search docs/ "installation instructions"
# Interactive mode
python rag_cli.py ask docs/
Jupyter Notebook Integration
Using ragit in Jupyter notebooks:
# Cell 1: Setup
from ragit import RAGAssistant, load_directory
from ragit.providers import OllamaProvider
# Check Ollama
provider = OllamaProvider()
print(f"Ollama available: {provider.is_available()}")
# Cell 2: Create assistant
assistant = RAGAssistant(
"docs/",
chunk_size=512,
chunk_overlap=50
)
print(f"Loaded {len(assistant._chunks)} chunks")
# Cell 3: Interactive Q&A
def ask(question: str):
"""Helper function for notebook Q&A."""
print(f"Q: {question}\n")
# Show sources
results = assistant.retrieve(question, top_k=3)
print("Sources:")
for chunk, score in results:
print(f" - {chunk.doc_id} (score: {score:.3f})")
# Generate answer
answer = assistant.ask(question)
print(f"\nA: {answer}")
# Usage
ask("How do I configure the database?")
# Cell 4: Visualization (optional)
import matplotlib.pyplot as plt
# Visualize chunk scores for a query
results = assistant.retrieve("What is RAG?", top_k=10)
scores = [score for _, score in results]
labels = [f"Chunk {i}" for i in range(len(scores))]
plt.figure(figsize=(10, 5))
plt.bar(labels, scores)
plt.xlabel("Chunks")
plt.ylabel("Similarity Score")
plt.title("Chunk Relevance Scores")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
Docker Deployment
Dockerfile for ragit application:
FROM python:3.12-slim
WORKDIR /app
# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY app.py .
COPY docs/ ./docs/
# Environment variables
ENV OLLAMA_BASE_URL=http://ollama:11434
ENV RAGIT_DEFAULT_LLM_MODEL=llama3
EXPOSE 8000
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
Docker Compose with Ollama:
version: '3.8'
services:
ollama:
image: ollama/ollama
ports:
- "11434:11434"
volumes:
- ollama_data:/root/.ollama
rag-api:
build: .
ports:
- "8000:8000"
environment:
- OLLAMA_BASE_URL=http://ollama:11434
depends_on:
- ollama
volumes:
ollama_data:
Running:
# Start services
docker-compose up -d
# Pull models in Ollama container
docker-compose exec ollama ollama pull llama3
docker-compose exec ollama ollama pull mxbai-embed-large
# Test the API
curl http://localhost:8000/ask \
-H "Content-Type: application/json" \
-d '{"question": "What is ragit?"}'