Spaces:
Sleeping
Sleeping
New changes
Browse files
app.py
CHANGED
|
@@ -564,7 +564,19 @@ def evaluate_rag_pipeline(domain, q_indices):
|
|
| 564 |
gt_r = dataset[i].get('relevance_score')
|
| 565 |
gt_u = dataset[i].get('utilization_score')
|
| 566 |
gt_c = dataset[i].get('completeness_score')
|
| 567 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 568 |
|
| 569 |
safe_append(gt_relevance, pred_relevance, gt_r, predicted['Context Relevance'])
|
| 570 |
safe_append(gt_utilization, pred_utilization, gt_u, predicted['Context Utilization'])
|
|
|
|
| 564 |
gt_r = dataset[i].get('relevance_score')
|
| 565 |
gt_u = dataset[i].get('utilization_score')
|
| 566 |
gt_c = dataset[i].get('completeness_score')
|
| 567 |
+
|
| 568 |
+
adherence_raw = dataset[i].get('adherence_score')
|
| 569 |
+
if isinstance(adherence_raw, bool):
|
| 570 |
+
gt_a = 1 if adherence_raw is True else 0
|
| 571 |
+
elif isinstance(adherence_raw, str):
|
| 572 |
+
if adherence_raw.lower() == "true":
|
| 573 |
+
gt_a = 1
|
| 574 |
+
elif adherence_raw.lower() == "false":
|
| 575 |
+
gt_a = 0
|
| 576 |
+
else:
|
| 577 |
+
gt_a = None
|
| 578 |
+
else:
|
| 579 |
+
gt_a = None
|
| 580 |
|
| 581 |
safe_append(gt_relevance, pred_relevance, gt_r, predicted['Context Relevance'])
|
| 582 |
safe_append(gt_utilization, pred_utilization, gt_u, predicted['Context Utilization'])
|