Spaces:
Running
Running
| from typing import Any, Dict, List, Tuple | |
| import numpy as np | |
| from sklearn.metrics import mean_squared_error, roc_auc_score | |
| def _all_sentence_keys( | |
| docs_sentences: List[List[Tuple[str, str]]] | |
| ) -> List[str]: | |
| keys: List[str] = [] | |
| for doc in docs_sentences: | |
| for key, _ in doc: | |
| keys.append(key) | |
| return keys | |
| def trace_from_attributes( | |
| attrs: Dict[str, Any], | |
| docs_sentences: List[List[Tuple[str, str]]], | |
| ) -> Dict[str, float]: | |
| all_keys = _all_sentence_keys(docs_sentences) | |
| total = len(all_keys) | |
| if total == 0: | |
| return { | |
| "relevance": 0.0, | |
| "utilization": 0.0, | |
| "completeness": 0.0, | |
| "adherence": 0.0, | |
| } | |
| relevant = set(attrs.get("all_relevant_sentence_keys", [])) & set(all_keys) | |
| utilized = set(attrs.get("all_utilized_sentence_keys", [])) & set(all_keys) | |
| relevance = len(relevant) / total if total > 0 else 0.0 | |
| utilization = len(utilized) / total if total > 0 else 0.0 | |
| completeness = ( | |
| len(relevant & utilized) / len(relevant) if relevant else 0.0 | |
| ) | |
| adherence = 1.0 if attrs.get("overall_supported", False) else 0.0 | |
| return { | |
| "relevance": float(relevance), | |
| "utilization": float(utilization), | |
| "completeness": float(completeness), | |
| "adherence": float(adherence), | |
| } | |
| def compute_rmse_auc( | |
| y_true_rel: List[float], | |
| y_pred_rel: List[float], | |
| y_true_util: List[float], | |
| y_pred_util: List[float], | |
| y_true_comp: List[float], | |
| y_pred_comp: List[float], | |
| y_true_adh: List[int], | |
| y_pred_adh: List[float], | |
| ) -> Dict[str, float]: | |
| metrics = { | |
| "rmse_relevance": float( | |
| mean_squared_error(y_true_rel, y_pred_rel, squared=False) | |
| ), | |
| "rmse_utilization": float( | |
| mean_squared_error(y_true_util, y_pred_util, squared=False) | |
| ), | |
| "rmse_completeness": float( | |
| mean_squared_error(y_true_comp, y_pred_comp, squared=False) | |
| ), | |
| } | |
| if len(set(y_true_adh)) > 1: | |
| metrics["auroc_adherence"] = float( | |
| roc_auc_score(y_true_adh, y_pred_adh) | |
| ) | |
| else: | |
| #metrics["auroc_adherence"] = float("nan") | |
| metrics["auroc_adherence"] = 0.5 # or None, but not float("nan") | |
| return metrics | |