Source code for ragit.core.experiment.results

#
# Copyright RODMENA LIMITED 2025
# SPDX-License-Identifier: Apache-2.0
#
"""
Ragit experiment results.
"""

from collections.abc import Iterator
from dataclasses import asdict, dataclass, field
from typing import Any



[docs]
@dataclass
class EvaluationResult:
    """
    Result from evaluating a single RAG configuration.

    Parameters
    ----------
    pattern_name : str
        Name of the RAG pattern (e.g., "Pattern_1").
    indexing_params : dict[str, Any]
        Hyperparameters used during indexing (chunk_size, overlap, etc.).
    inference_params : dict[str, Any]
        Hyperparameters used during inference (num_chunks, llm_model, etc.).
    scores : dict[str, dict]
        Evaluation scores (answer_correctness, context_relevance, faithfulness).
    execution_time : float
        Time taken for evaluation in seconds.
    final_score : float
        Combined score for optimization ranking.
    """

    pattern_name: str
    indexing_params: dict[str, Any]
    inference_params: dict[str, Any]
    scores: dict[str, dict[str, float]]
    execution_time: float
    final_score: float


[docs]
    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary."""
        return asdict(self)


    def __repr__(self) -> str:
        return (
            f"EvaluationResult(name={self.pattern_name}, score={self.final_score:.3f}, time={self.execution_time:.1f}s)"
        )




[docs]
@dataclass
class ExperimentResults:
    """
    Collection of evaluation results from an optimization experiment.

    Attributes
    ----------
    evaluations : list[EvaluationResult]
        All evaluation results.
    """

    evaluations: list[EvaluationResult] = field(default_factory=list)

    def __len__(self) -> int:
        return len(self.evaluations)

    def __iter__(self) -> Iterator[EvaluationResult]:
        yield from self.evaluations

    def __bool__(self) -> bool:
        return bool(self.evaluations)


[docs]
    def add(self, result: EvaluationResult) -> None:
        """Add an evaluation result."""
        self.evaluations.append(result)



[docs]
    def is_cached(
        self,
        indexing_params: dict[str, Any],
        inference_params: dict[str, Any],
    ) -> float | None:
        """
        Check if this configuration was already evaluated.

        Returns
        -------
        float or None
            Final score if cached, None otherwise.
        """
        for ev in self.evaluations:
            if ev.indexing_params == indexing_params and ev.inference_params == inference_params:
                return ev.final_score
        return None


    @property
    def scores(self) -> list[float]:
        """All final scores."""
        return [ev.final_score for ev in self.evaluations]


[docs]
    def sorted(self, reverse: bool = True) -> list[EvaluationResult]:
        """
        Get results sorted by final score.

        Parameters
        ----------
        reverse : bool
            If True (default), best scores first.

        Returns
        -------
        list[EvaluationResult]
            Sorted results.
        """
        return sorted(self.evaluations, key=lambda x: x.final_score, reverse=reverse)



[docs]
    def get_best(self, k: int = 1) -> list[EvaluationResult]:
        """
        Get k best results.

        Parameters
        ----------
        k : int
            Number of results to return.

        Returns
        -------
        list[EvaluationResult]
            Top k results by score.
        """
        return self.sorted()[:k]