#
# Copyright RODMENA LIMITED 2025
# SPDX-License-Identifier: Apache-2.0
#
"""
Ragit experiment results.
"""
from collections.abc import Iterator
from dataclasses import asdict, dataclass, field
from typing import Any
[docs]
@dataclass
class EvaluationResult:
"""
Result from evaluating a single RAG configuration.
Parameters
----------
pattern_name : str
Name of the RAG pattern (e.g., "Pattern_1").
indexing_params : dict[str, Any]
Hyperparameters used during indexing (chunk_size, overlap, etc.).
inference_params : dict[str, Any]
Hyperparameters used during inference (num_chunks, llm_model, etc.).
scores : dict[str, dict]
Evaluation scores (answer_correctness, context_relevance, faithfulness).
execution_time : float
Time taken for evaluation in seconds.
final_score : float
Combined score for optimization ranking.
"""
pattern_name: str
indexing_params: dict[str, Any]
inference_params: dict[str, Any]
scores: dict[str, dict[str, float]]
execution_time: float
final_score: float
[docs]
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary."""
return asdict(self)
def __repr__(self) -> str:
return (
f"EvaluationResult(name={self.pattern_name}, score={self.final_score:.3f}, time={self.execution_time:.1f}s)"
)
[docs]
@dataclass
class ExperimentResults:
"""
Collection of evaluation results from an optimization experiment.
Attributes
----------
evaluations : list[EvaluationResult]
All evaluation results.
"""
evaluations: list[EvaluationResult] = field(default_factory=list)
def __len__(self) -> int:
return len(self.evaluations)
def __iter__(self) -> Iterator[EvaluationResult]:
yield from self.evaluations
def __bool__(self) -> bool:
return bool(self.evaluations)
[docs]
def add(self, result: EvaluationResult) -> None:
"""Add an evaluation result."""
self.evaluations.append(result)
[docs]
def is_cached(
self,
indexing_params: dict[str, Any],
inference_params: dict[str, Any],
) -> float | None:
"""
Check if this configuration was already evaluated.
Returns
-------
float or None
Final score if cached, None otherwise.
"""
for ev in self.evaluations:
if ev.indexing_params == indexing_params and ev.inference_params == inference_params:
return ev.final_score
return None
@property
def scores(self) -> list[float]:
"""All final scores."""
return [ev.final_score for ev in self.evaluations]
[docs]
def sorted(self, reverse: bool = True) -> list[EvaluationResult]:
"""
Get results sorted by final score.
Parameters
----------
reverse : bool
If True (default), best scores first.
Returns
-------
list[EvaluationResult]
Sorted results.
"""
return sorted(self.evaluations, key=lambda x: x.final_score, reverse=reverse)
[docs]
def get_best(self, k: int = 1) -> list[EvaluationResult]:
"""
Get k best results.
Parameters
----------
k : int
Number of results to return.
Returns
-------
list[EvaluationResult]
Top k results by score.
"""
return self.sorted()[:k]