Platform Integration
====================

This guide shows how to integrate ragit with popular frameworks and tools.

Flask Integration
-----------------

Basic Flask Application
^^^^^^^^^^^^^^^^^^^^^^^

.. code-block:: python

   from flask import Flask, request, jsonify
   from ragit import RAGAssistant

   app = Flask(__name__)

   # Create assistant at startup (NOT thread-safe, see below)
   assistant = RAGAssistant("docs/")

   @app.route("/ask", methods=["POST"])
   def ask():
       data = request.json
       question = data.get("question", "")

       if not question:
           return jsonify({"error": "No question provided"}), 400

       answer = assistant.ask(question)
       return jsonify({"answer": answer})

   @app.route("/health")
   def health():
       return jsonify({"status": "ok"})

   if __name__ == "__main__":
       app.run(debug=True)

Thread-Safe Flask Application
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

For production with multiple workers, use a factory pattern:

.. code-block:: python

   from flask import Flask, request, jsonify, g
   from ragit import RAGAssistant

   app = Flask(__name__)

   def get_assistant():
       """Get or create assistant for current request context."""
       if "assistant" not in g:
           g.assistant = RAGAssistant(
               "docs/",
               chunk_size=512,
               chunk_overlap=50
           )
       return g.assistant

   @app.teardown_appcontext
   def teardown_assistant(exception):
       """Clean up assistant after request."""
       g.pop("assistant", None)

   @app.route("/ask", methods=["POST"])
   def ask():
       data = request.json
       question = data.get("question", "")

       if not question:
           return jsonify({"error": "No question provided"}), 400

       assistant = get_assistant()
       answer = assistant.ask(question)

       return jsonify({
           "question": question,
           "answer": answer
       })

   @app.route("/retrieve", methods=["POST"])
   def retrieve():
       data = request.json
       question = data.get("question", "")
       top_k = data.get("top_k", 3)

       assistant = get_assistant()
       results = assistant.retrieve(question, top_k=top_k)

       return jsonify({
           "question": question,
           "results": [
               {
                   "content": chunk.content,
                   "doc_id": chunk.doc_id,
                   "score": score
               }
               for chunk, score in results
           ]
       })

   if __name__ == "__main__":
       app.run(debug=True, threaded=True)

Using with curl:

.. code-block:: bash

   # Ask a question
   curl -X POST http://localhost:5000/ask \
        -H "Content-Type: application/json" \
        -d '{"question": "How do I install ragit?"}'

   # Retrieve context
   curl -X POST http://localhost:5000/retrieve \
        -H "Content-Type: application/json" \
        -d '{"question": "What is RAG?", "top_k": 5}'

FastAPI Integration
-------------------

Basic FastAPI Application
^^^^^^^^^^^^^^^^^^^^^^^^^

.. code-block:: python

   from fastapi import FastAPI, HTTPException
   from pydantic import BaseModel
   from ragit import RAGAssistant

   app = FastAPI(title="RAG API", version="1.0.0")

   # Global assistant (for simple deployments)
   assistant = RAGAssistant("docs/")

   class QuestionRequest(BaseModel):
       question: str
       top_k: int = 3

   class AnswerResponse(BaseModel):
       question: str
       answer: str

   class RetrieveResponse(BaseModel):
       question: str
       results: list[dict]

   @app.post("/ask", response_model=AnswerResponse)
   async def ask(request: QuestionRequest):
       if not request.question.strip():
           raise HTTPException(status_code=400, detail="Question cannot be empty")

       answer = assistant.ask(request.question)
       return AnswerResponse(question=request.question, answer=answer)

   @app.post("/retrieve", response_model=RetrieveResponse)
   async def retrieve(request: QuestionRequest):
       results = assistant.retrieve(request.question, top_k=request.top_k)

       return RetrieveResponse(
           question=request.question,
           results=[
               {
                   "content": chunk.content,
                   "doc_id": chunk.doc_id,
                   "score": float(score)
               }
               for chunk, score in results
           ]
       )

   @app.get("/health")
   async def health():
       return {"status": "ok"}

Production FastAPI with Dependency Injection
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. code-block:: python

   from contextlib import asynccontextmanager
   from fastapi import FastAPI, Depends, HTTPException
   from pydantic import BaseModel
   from ragit import RAGAssistant
   from ragit.providers import OllamaProvider
   import threading

   # Thread-local storage for assistants
   _local = threading.local()

   def get_assistant() -> RAGAssistant:
       """Get thread-local assistant instance."""
       if not hasattr(_local, "assistant"):
           _local.assistant = RAGAssistant(
               "docs/",
               chunk_size=512,
               chunk_overlap=50
           )
       return _local.assistant

   @asynccontextmanager
   async def lifespan(app: FastAPI):
       # Startup: verify Ollama is available
       provider = OllamaProvider()
       if not provider.is_available():
           raise RuntimeError("Ollama server not available")
       yield
       # Shutdown: cleanup if needed

   app = FastAPI(
       title="RAG API",
       version="1.0.0",
       lifespan=lifespan
   )

   class QuestionRequest(BaseModel):
       question: str
       top_k: int = 3

   class AnswerResponse(BaseModel):
       question: str
       answer: str
       sources: list[dict] | None = None

   @app.post("/ask", response_model=AnswerResponse)
   async def ask(
       request: QuestionRequest,
       assistant: RAGAssistant = Depends(get_assistant)
   ):
       if not request.question.strip():
           raise HTTPException(status_code=400, detail="Question cannot be empty")

       # Get answer with sources
       context = assistant.retrieve(request.question, top_k=request.top_k)
       answer = assistant.ask(request.question)

       return AnswerResponse(
           question=request.question,
           answer=answer,
           sources=[
               {"doc_id": chunk.doc_id, "score": float(score)}
               for chunk, score in context
           ]
       )

   @app.get("/models")
   async def list_models():
       """List available models."""
       provider = OllamaProvider()
       return {
           "llm_models": ["llama3", "mistral", "codellama"],
           "embedding_models": ["mxbai-embed-large", "nomic-embed-text"]
       }

Running FastAPI:

.. code-block:: bash

   # Install uvicorn
   pip install uvicorn

   # Run the server
   uvicorn app:app --reload --host 0.0.0.0 --port 8000

   # With multiple workers (production)
   uvicorn app:app --workers 4 --host 0.0.0.0 --port 8000

Command Line Interface
----------------------

Basic CLI Tool
^^^^^^^^^^^^^^

.. code-block:: python

   #!/usr/bin/env python3
   """Simple RAG CLI tool."""

   import argparse
   import sys
   from ragit import RAGAssistant
   from ragit.providers import OllamaProvider

   def main():
       parser = argparse.ArgumentParser(
           description="RAG-powered Q&A from documents"
       )
       parser.add_argument(
           "docs_path",
           help="Path to documents directory"
       )
       parser.add_argument(
           "-q", "--question",
           help="Question to ask (interactive if not provided)"
       )
       parser.add_argument(
           "--chunk-size",
           type=int,
           default=512,
           help="Chunk size (default: 512)"
       )
       parser.add_argument(
           "--top-k",
           type=int,
           default=3,
           help="Number of chunks to retrieve (default: 3)"
       )
       parser.add_argument(
           "--show-sources",
           action="store_true",
           help="Show source chunks with answer"
       )

       args = parser.parse_args()

       # Check Ollama availability
       provider = OllamaProvider()
       if not provider.is_available():
           print("Error: Ollama server not available", file=sys.stderr)
           print("Start with: ollama serve", file=sys.stderr)
           sys.exit(1)

       # Create assistant
       print(f"Loading documents from {args.docs_path}...")
       assistant = RAGAssistant(
           args.docs_path,
           chunk_size=args.chunk_size
       )
       print("Ready!\n")

       def ask_question(question: str):
           if args.show_sources:
               # Get sources first
               results = assistant.retrieve(question, top_k=args.top_k)
               print("\nSources:")
               for i, (chunk, score) in enumerate(results, 1):
                   print(f"  {i}. [{chunk.doc_id}] (score: {score:.3f})")
                   print(f"     {chunk.content[:100]}...")
               print()

           answer = assistant.ask(question)
           print(f"Answer: {answer}\n")

       if args.question:
           # Single question mode
           ask_question(args.question)
       else:
           # Interactive mode
           print("Enter questions (Ctrl+C to exit):\n")
           try:
               while True:
                   question = input("Q: ").strip()
                   if question:
                       ask_question(question)
           except KeyboardInterrupt:
               print("\nGoodbye!")

   if __name__ == "__main__":
       main()

Save as ``rag_cli.py`` and use:

.. code-block:: bash

   # Single question
   python rag_cli.py docs/ -q "How do I install?"

   # Interactive mode with sources
   python rag_cli.py docs/ --show-sources

   # Custom settings
   python rag_cli.py docs/ --chunk-size 1024 --top-k 5

Advanced CLI with Click
^^^^^^^^^^^^^^^^^^^^^^^

.. code-block:: python

   #!/usr/bin/env python3
   """Advanced RAG CLI with Click."""

   import click
   import json
   from ragit import RAGAssistant, RagitExperiment, Document, BenchmarkQuestion
   from ragit.providers import OllamaProvider

   @click.group()
   def cli():
       """RAG-powered document Q&A tool."""
       pass

   @cli.command()
   @click.argument("docs_path")
   @click.option("-q", "--question", help="Question to ask")
   @click.option("--chunk-size", default=512, help="Chunk size")
   @click.option("--top-k", default=3, help="Number of chunks")
   @click.option("--json-output", is_flag=True, help="Output as JSON")
   def ask(docs_path, question, chunk_size, top_k, json_output):
       """Ask questions about documents."""
       assistant = RAGAssistant(docs_path, chunk_size=chunk_size)

       if question:
           answer = assistant.ask(question)
           if json_output:
               click.echo(json.dumps({"question": question, "answer": answer}))
           else:
               click.echo(f"Answer: {answer}")
       else:
           # Interactive mode
           while True:
               try:
                   q = click.prompt("Question", default="", show_default=False)
                   if not q:
                       continue
                   answer = assistant.ask(q)
                   click.echo(f"Answer: {answer}\n")
               except click.Abort:
                   break

   @cli.command()
   @click.argument("docs_path")
   @click.argument("query")
   @click.option("--top-k", default=5, help="Number of results")
   def search(docs_path, query, top_k):
       """Search for relevant document chunks."""
       assistant = RAGAssistant(docs_path)
       results = assistant.retrieve(query, top_k=top_k)

       for i, (chunk, score) in enumerate(results, 1):
           click.echo(f"\n{i}. Score: {score:.3f} | Source: {chunk.doc_id}")
           click.echo(f"   {chunk.content[:200]}...")

   @cli.command()
   def check():
       """Check if Ollama is available."""
       provider = OllamaProvider()
       if provider.is_available():
           click.echo("Ollama is available")
           click.echo(f"URL: {provider.base_url}")
       else:
           click.echo("Ollama is NOT available", err=True)
           raise SystemExit(1)

   if __name__ == "__main__":
       cli()

Usage:

.. code-block:: bash

   # Check Ollama
   python rag_cli.py check

   # Ask question
   python rag_cli.py ask docs/ -q "What is ragit?"

   # Search documents
   python rag_cli.py search docs/ "installation instructions"

   # Interactive mode
   python rag_cli.py ask docs/

Jupyter Notebook Integration
----------------------------

Using ragit in Jupyter notebooks:

.. code-block:: python

   # Cell 1: Setup
   from ragit import RAGAssistant, load_directory
   from ragit.providers import OllamaProvider

   # Check Ollama
   provider = OllamaProvider()
   print(f"Ollama available: {provider.is_available()}")

   # Cell 2: Create assistant
   assistant = RAGAssistant(
       "docs/",
       chunk_size=512,
       chunk_overlap=50
   )
   print(f"Loaded {len(assistant._chunks)} chunks")

   # Cell 3: Interactive Q&A
   def ask(question: str):
       """Helper function for notebook Q&A."""
       print(f"Q: {question}\n")

       # Show sources
       results = assistant.retrieve(question, top_k=3)
       print("Sources:")
       for chunk, score in results:
           print(f"  - {chunk.doc_id} (score: {score:.3f})")

       # Generate answer
       answer = assistant.ask(question)
       print(f"\nA: {answer}")

   # Usage
   ask("How do I configure the database?")

   # Cell 4: Visualization (optional)
   import matplotlib.pyplot as plt

   # Visualize chunk scores for a query
   results = assistant.retrieve("What is RAG?", top_k=10)
   scores = [score for _, score in results]
   labels = [f"Chunk {i}" for i in range(len(scores))]

   plt.figure(figsize=(10, 5))
   plt.bar(labels, scores)
   plt.xlabel("Chunks")
   plt.ylabel("Similarity Score")
   plt.title("Chunk Relevance Scores")
   plt.xticks(rotation=45)
   plt.tight_layout()
   plt.show()

Docker Deployment
-----------------

Dockerfile for ragit application:

.. code-block:: dockerfile

   FROM python:3.12-slim

   WORKDIR /app

   # Install dependencies
   COPY requirements.txt .
   RUN pip install --no-cache-dir -r requirements.txt

   # Copy application
   COPY app.py .
   COPY docs/ ./docs/

   # Environment variables
   ENV OLLAMA_BASE_URL=http://ollama:11434
   ENV RAGIT_DEFAULT_LLM_MODEL=llama3

   EXPOSE 8000

   CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

Docker Compose with Ollama:

.. code-block:: yaml

   version: '3.8'

   services:
     ollama:
       image: ollama/ollama
       ports:
         - "11434:11434"
       volumes:
         - ollama_data:/root/.ollama

     rag-api:
       build: .
       ports:
         - "8000:8000"
       environment:
         - OLLAMA_BASE_URL=http://ollama:11434
       depends_on:
         - ollama

   volumes:
     ollama_data:

Running:

.. code-block:: bash

   # Start services
   docker-compose up -d

   # Pull models in Ollama container
   docker-compose exec ollama ollama pull llama3
   docker-compose exec ollama ollama pull mxbai-embed-large

   # Test the API
   curl http://localhost:8000/ask \
        -H "Content-Type: application/json" \
        -d '{"question": "What is ragit?"}'