Spaces:
Running
Add comparison report card feature to compare screen
Browse files- Created generate_comparison_report_card() function in components/report_cards.py
- Side-by-side comparison with winner highlighting
- Green checkmarks for winning metrics
- Overall winner recommendation
- Black background with blue border matching other report cards
- Updated screens/compare.py
- Added Report Card tab with download button
- Moved download button inside tab (matching leaderboard pattern)
- Added comparison_card_html component with proper elem_id
- Updated app.py
- Added comparison_card_html to compare button outputs
- Wired up download_comparison_card_btn click handler
- Fixed CSS styling issues
- Escaped curly braces in f-string CSS
- Renamed inner div ID to avoid conflicts
- Added .tracemind-comparison-card to download fallback
- Fixed strong tag text color to white
- Added 3px solid #667eea border
- Matched padding, border-radius, and font to other cards
- app.py +8 -1
- components/report_cards.py +161 -2
- screens/compare.py +28 -7
|
@@ -1806,7 +1806,8 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
|
|
| 1806 |
compare_components['run_b_card'],
|
| 1807 |
compare_components['comparison_charts'],
|
| 1808 |
compare_components['winner_summary'],
|
| 1809 |
-
compare_components['radar_comparison_chart']
|
|
|
|
| 1810 |
]
|
| 1811 |
)
|
| 1812 |
|
|
@@ -1819,6 +1820,12 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
|
|
| 1819 |
]
|
| 1820 |
)
|
| 1821 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1822 |
leaderboard_table.select(
|
| 1823 |
fn=on_drilldown_select,
|
| 1824 |
inputs=[leaderboard_table], # Pass dataframe to handler (like MockTraceMind)
|
|
|
|
| 1806 |
compare_components['run_b_card'],
|
| 1807 |
compare_components['comparison_charts'],
|
| 1808 |
compare_components['winner_summary'],
|
| 1809 |
+
compare_components['radar_comparison_chart'],
|
| 1810 |
+
compare_components['comparison_card_html']
|
| 1811 |
]
|
| 1812 |
)
|
| 1813 |
|
|
|
|
| 1820 |
]
|
| 1821 |
)
|
| 1822 |
|
| 1823 |
+
# Download comparison report card as PNG
|
| 1824 |
+
compare_components['download_comparison_card_btn'].click(
|
| 1825 |
+
fn=None,
|
| 1826 |
+
js=download_card_as_png_js(element_id="comparison-card-html")
|
| 1827 |
+
)
|
| 1828 |
+
|
| 1829 |
leaderboard_table.select(
|
| 1830 |
fn=on_drilldown_select,
|
| 1831 |
inputs=[leaderboard_table], # Pass dataframe to handler (like MockTraceMind)
|
|
@@ -311,8 +311,8 @@ def download_card_as_png_js(element_id: str = "summary-card-html") -> str:
|
|
| 311 |
let card = document.getElementById('{element_id}');
|
| 312 |
|
| 313 |
if (!card) {{
|
| 314 |
-
console.log('ID not found, trying class
|
| 315 |
-
card = document.querySelector('.tracemind-run-card');
|
| 316 |
}}
|
| 317 |
|
| 318 |
if (!card) {{
|
|
@@ -599,3 +599,162 @@ def _get_card_css() -> str:
|
|
| 599 |
}
|
| 600 |
</style>
|
| 601 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
let card = document.getElementById('{element_id}');
|
| 312 |
|
| 313 |
if (!card) {{
|
| 314 |
+
console.log('ID not found, trying class selectors...');
|
| 315 |
+
card = document.querySelector('.tracemind-run-card, .tracemind-comparison-card, .tracemind-summary-card');
|
| 316 |
}}
|
| 317 |
|
| 318 |
if (!card) {{
|
|
|
|
| 599 |
}
|
| 600 |
</style>
|
| 601 |
"""
|
| 602 |
+
|
| 603 |
+
|
| 604 |
+
def generate_comparison_report_card(run_a_data: dict, run_b_data: dict) -> str:
|
| 605 |
+
"""
|
| 606 |
+
Generate HTML for comparison report card showing two runs side by side
|
| 607 |
+
|
| 608 |
+
Args:
|
| 609 |
+
run_a_data: Dictionary with Run A information
|
| 610 |
+
run_b_data: Dictionary with Run B information
|
| 611 |
+
|
| 612 |
+
Returns:
|
| 613 |
+
HTML string for comparison report card
|
| 614 |
+
"""
|
| 615 |
+
|
| 616 |
+
if not run_a_data or not run_b_data:
|
| 617 |
+
return _create_empty_card_html("Missing run data for comparison")
|
| 618 |
+
|
| 619 |
+
model_a = run_a_data.get('model', 'Unknown').split('/')[-1]
|
| 620 |
+
model_b = run_b_data.get('model', 'Unknown').split('/')[-1]
|
| 621 |
+
|
| 622 |
+
# Get logo
|
| 623 |
+
logo_base64 = _get_logo_base64()
|
| 624 |
+
|
| 625 |
+
# Determine winners for each metric
|
| 626 |
+
success_winner = "A" if run_a_data.get('success_rate', 0) > run_b_data.get('success_rate', 0) else "B"
|
| 627 |
+
cost_winner = "A" if run_a_data.get('total_cost_usd', 999) < run_b_data.get('total_cost_usd', 999) else "B"
|
| 628 |
+
speed_winner = "A" if run_a_data.get('avg_duration_ms', 999999) < run_b_data.get('avg_duration_ms', 999999) else "B"
|
| 629 |
+
eco_winner = "A" if run_a_data.get('co2_emissions_g', 999) < run_b_data.get('co2_emissions_g', 999) else "B"
|
| 630 |
+
|
| 631 |
+
# Count overall wins
|
| 632 |
+
a_wins = sum(1 for w in [success_winner, cost_winner, speed_winner, eco_winner] if w == "A")
|
| 633 |
+
b_wins = 4 - a_wins
|
| 634 |
+
overall_winner = "A" if a_wins > b_wins else ("B" if b_wins > a_wins else "Tie")
|
| 635 |
+
|
| 636 |
+
html = f"""
|
| 637 |
+
<div class="tracemind-comparison-card" id="comparison-card-content">
|
| 638 |
+
<div class="card-header">
|
| 639 |
+
{f'<img src="data:image/png;base64,{logo_base64}" alt="TraceMind Logo" class="card-logo" style="display: block !important; margin: 0 auto 15px auto !important; width: 120px !important; height: auto !important;" />' if logo_base64 else ''}
|
| 640 |
+
<h1>⚖️ Model Comparison Report</h1>
|
| 641 |
+
<p class="card-meta" style="color: rgba(255, 255, 255, 0.7) !important;">{model_a} vs {model_b}</p>
|
| 642 |
+
<p class="card-date" style="color: rgba(255, 255, 255, 0.7) !important;">{datetime.now().strftime('%Y-%m-%d %H:%M')}</p>
|
| 643 |
+
</div>
|
| 644 |
+
|
| 645 |
+
<div class="card-body">
|
| 646 |
+
<!-- Overall Winner -->
|
| 647 |
+
<div class="success-section">
|
| 648 |
+
<div class="stars">{'🏆' * 5}</div>
|
| 649 |
+
<div class="success-rate" style="color: #ffffff !important;">
|
| 650 |
+
Overall Winner: Run {overall_winner} ({a_wins if overall_winner == "A" else b_wins}/4 categories)
|
| 651 |
+
</div>
|
| 652 |
+
</div>
|
| 653 |
+
|
| 654 |
+
<!-- Side by Side Comparison -->
|
| 655 |
+
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin: 20px 0;">
|
| 656 |
+
<!-- Run A -->
|
| 657 |
+
<div style="padding: 15px; background: rgba(102, 126, 234, 0.1); border-radius: 8px; border: 2px solid {'#00ff00' if overall_winner == "A" else '#667eea'};">
|
| 658 |
+
<h3 style="color: #667eea !important; margin-top: 0;">Run A: {model_a}</h3>
|
| 659 |
+
<div class="metrics-list">
|
| 660 |
+
<div style="color: {'#00ff00' if success_winner == "A" else '#ffffff'} !important; font-weight: {'bold' if success_winner == "A" else 'normal'};">
|
| 661 |
+
{'✅' if success_winner == "A" else '📊'} Success: {run_a_data.get('success_rate', 0):.1f}%
|
| 662 |
+
</div>
|
| 663 |
+
<div style="color: {'#00ff00' if cost_winner == "A" else '#ffffff'} !important; font-weight: {'bold' if cost_winner == "A" else 'normal'};">
|
| 664 |
+
{'✅' if cost_winner == "A" else '💰'} Cost: ${run_a_data.get('total_cost_usd', 0):.4f}
|
| 665 |
+
</div>
|
| 666 |
+
<div style="color: {'#00ff00' if speed_winner == "A" else '#ffffff'} !important; font-weight: {'bold' if speed_winner == "A" else 'normal'};">
|
| 667 |
+
{'✅' if speed_winner == "A" else '⚡'} Speed: {run_a_data.get('avg_duration_ms', 0)/1000:.2f}s
|
| 668 |
+
</div>
|
| 669 |
+
<div style="color: {'#00ff00' if eco_winner == "A" else '#ffffff'} !important; font-weight: {'bold' if eco_winner == "A" else 'normal'};">
|
| 670 |
+
{'✅' if eco_winner == "A" else '🌱'} CO2: {run_a_data.get('co2_emissions_g', 0):.2f}g
|
| 671 |
+
</div>
|
| 672 |
+
</div>
|
| 673 |
+
</div>
|
| 674 |
+
|
| 675 |
+
<!-- Run B -->
|
| 676 |
+
<div style="padding: 15px; background: rgba(118, 75, 162, 0.1); border-radius: 8px; border: 2px solid {'#00ff00' if overall_winner == "B" else '#764ba2'};">
|
| 677 |
+
<h3 style="color: #764ba2 !important; margin-top: 0;">Run B: {model_b}</h3>
|
| 678 |
+
<div class="metrics-list">
|
| 679 |
+
<div style="color: {'#00ff00' if success_winner == "B" else '#ffffff'} !important; font-weight: {'bold' if success_winner == "B" else 'normal'};">
|
| 680 |
+
{'✅' if success_winner == "B" else '📊'} Success: {run_b_data.get('success_rate', 0):.1f}%
|
| 681 |
+
</div>
|
| 682 |
+
<div style="color: {'#00ff00' if cost_winner == "B" else '#ffffff'} !important; font-weight: {'bold' if cost_winner == "B" else 'normal'};">
|
| 683 |
+
{'✅' if cost_winner == "B" else '💰'} Cost: ${run_b_data.get('total_cost_usd', 0):.4f}
|
| 684 |
+
</div>
|
| 685 |
+
<div style="color: {'#00ff00' if speed_winner == "B" else '#ffffff'} !important; font-weight: {'bold' if speed_winner == "B" else 'normal'};">
|
| 686 |
+
{'✅' if speed_winner == "B" else '⚡'} Speed: {run_b_data.get('avg_duration_ms', 0)/1000:.2f}s
|
| 687 |
+
</div>
|
| 688 |
+
<div style="color: {'#00ff00' if eco_winner == "B" else '#ffffff'} !important; font-weight: {'bold' if eco_winner == "B" else 'normal'};">
|
| 689 |
+
{'✅' if eco_winner == "B" else '🌱'} CO2: {run_b_data.get('co2_emissions_g', 0):.2f}g
|
| 690 |
+
</div>
|
| 691 |
+
</div>
|
| 692 |
+
</div>
|
| 693 |
+
</div>
|
| 694 |
+
|
| 695 |
+
<!-- Recommendation -->
|
| 696 |
+
<div class="metrics-section">
|
| 697 |
+
<h2 style="color: #ffffff !important;">💡 Recommendation</h2>
|
| 698 |
+
<p style="color: #ffffff !important; font-size: 1.1em;">
|
| 699 |
+
{f"<strong style='color: #ffffff !important;'>Run {overall_winner}</strong> ({model_a if overall_winner == 'A' else model_b}) is recommended for most use cases" if overall_winner != "Tie" else "Both runs are evenly matched - choose based on your specific priorities"}
|
| 700 |
+
</p>
|
| 701 |
+
</div>
|
| 702 |
+
</div>
|
| 703 |
+
|
| 704 |
+
<div class="card-footer">
|
| 705 |
+
<p style="margin: 0; color: #ffffff !important;">🔗 <span style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-weight: 600;">View detailed comparison at tracemind.huggingface.co</span></p>
|
| 706 |
+
</div>
|
| 707 |
+
</div>
|
| 708 |
+
|
| 709 |
+
<style>
|
| 710 |
+
.tracemind-comparison-card {{
|
| 711 |
+
background: #000000 !important;
|
| 712 |
+
border: 3px solid #667eea;
|
| 713 |
+
border-radius: 24px;
|
| 714 |
+
padding: 40px;
|
| 715 |
+
max-width: 900px;
|
| 716 |
+
margin: 20px auto;
|
| 717 |
+
color: #ffffff !important;
|
| 718 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
| 719 |
+
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.5);
|
| 720 |
+
}}
|
| 721 |
+
|
| 722 |
+
.tracemind-comparison-card .card-header {{
|
| 723 |
+
text-align: center;
|
| 724 |
+
margin-bottom: 25px;
|
| 725 |
+
}}
|
| 726 |
+
|
| 727 |
+
.tracemind-comparison-card h1 {{
|
| 728 |
+
color: white !important;
|
| 729 |
+
font-size: 2em !important;
|
| 730 |
+
margin: 10px 0 !important;
|
| 731 |
+
font-weight: 700 !important;
|
| 732 |
+
}}
|
| 733 |
+
|
| 734 |
+
.tracemind-comparison-card .metrics-section h2 {{
|
| 735 |
+
font-size: 1.3em !important;
|
| 736 |
+
margin: 15px 0 10px 0 !important;
|
| 737 |
+
font-weight: 600 !important;
|
| 738 |
+
}}
|
| 739 |
+
|
| 740 |
+
.tracemind-comparison-card .metrics-list {{
|
| 741 |
+
margin: 10px 0;
|
| 742 |
+
padding: 0;
|
| 743 |
+
list-style: none;
|
| 744 |
+
}}
|
| 745 |
+
|
| 746 |
+
.tracemind-comparison-card .metrics-list div {{
|
| 747 |
+
padding: 8px 0;
|
| 748 |
+
font-size: 1em;
|
| 749 |
+
}}
|
| 750 |
+
|
| 751 |
+
.tracemind-comparison-card .card-footer {{
|
| 752 |
+
margin-top: 25px;
|
| 753 |
+
padding-top: 20px;
|
| 754 |
+
border-top: 2px solid rgba(255, 255, 255, 0.2);
|
| 755 |
+
text-align: center;
|
| 756 |
+
}}
|
| 757 |
+
</style>
|
| 758 |
+
"""
|
| 759 |
+
|
| 760 |
+
return html
|
|
@@ -7,6 +7,7 @@ import gradio as gr
|
|
| 7 |
import plotly.graph_objects as go
|
| 8 |
from plotly.subplots import make_subplots
|
| 9 |
from typing import Dict, Any
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def create_run_comparison_card(run_data: Dict[str, Any], label: str) -> str:
|
|
@@ -217,12 +218,11 @@ def create_compare_ui():
|
|
| 217 |
gr.Markdown("# Compare Runs")
|
| 218 |
gr.Markdown("*Side-by-side comparison of two evaluation runs*")
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
)
|
| 226 |
|
| 227 |
gr.Markdown("## Select Runs to Compare")
|
| 228 |
with gr.Row():
|
|
@@ -288,6 +288,23 @@ def create_compare_ui():
|
|
| 288 |
show_label=False
|
| 289 |
)
|
| 290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
components['comparison_output'] = comparison_output
|
| 292 |
|
| 293 |
return compare_screen, components
|
|
@@ -367,13 +384,17 @@ def on_compare_runs(run_a_id: str, run_b_id: str, leaderboard_df, components: Di
|
|
| 367 |
from components.analytics_charts import create_comparison_radar
|
| 368 |
radar_chart = create_comparison_radar([run_a, run_b])
|
| 369 |
|
|
|
|
|
|
|
|
|
|
| 370 |
return {
|
| 371 |
components['comparison_output']: gr.update(visible=True),
|
| 372 |
components['run_a_card']: gr.update(value=card_a),
|
| 373 |
components['run_b_card']: gr.update(value=card_b),
|
| 374 |
components['comparison_charts']: gr.update(value=charts),
|
| 375 |
components['winner_summary']: gr.update(value=summary),
|
| 376 |
-
components['radar_comparison_chart']: gr.update(value=radar_chart)
|
|
|
|
| 377 |
}
|
| 378 |
|
| 379 |
except Exception as e:
|
|
|
|
| 7 |
import plotly.graph_objects as go
|
| 8 |
from plotly.subplots import make_subplots
|
| 9 |
from typing import Dict, Any
|
| 10 |
+
from components.report_cards import generate_comparison_report_card
|
| 11 |
|
| 12 |
|
| 13 |
def create_run_comparison_card(run_data: Dict[str, Any], label: str) -> str:
|
|
|
|
| 218 |
gr.Markdown("# Compare Runs")
|
| 219 |
gr.Markdown("*Side-by-side comparison of two evaluation runs*")
|
| 220 |
|
| 221 |
+
components['back_to_leaderboard_btn'] = gr.Button(
|
| 222 |
+
"⬅️ Back to Leaderboard",
|
| 223 |
+
variant="secondary",
|
| 224 |
+
size="sm"
|
| 225 |
+
)
|
|
|
|
| 226 |
|
| 227 |
gr.Markdown("## Select Runs to Compare")
|
| 228 |
with gr.Row():
|
|
|
|
| 288 |
show_label=False
|
| 289 |
)
|
| 290 |
|
| 291 |
+
with gr.TabItem("📄 Report Card"):
|
| 292 |
+
gr.Markdown("### 📥 Downloadable Comparison Report Card")
|
| 293 |
+
gr.Markdown("*Side-by-side comparison card with winner analysis*")
|
| 294 |
+
|
| 295 |
+
with gr.Row():
|
| 296 |
+
with gr.Column(scale=1):
|
| 297 |
+
components['download_comparison_card_btn'] = gr.Button(
|
| 298 |
+
"📥 Download as PNG",
|
| 299 |
+
variant="primary",
|
| 300 |
+
size="lg"
|
| 301 |
+
)
|
| 302 |
+
with gr.Column(scale=2):
|
| 303 |
+
components['comparison_card_html'] = gr.HTML(
|
| 304 |
+
label="Comparison Report Card",
|
| 305 |
+
elem_id="comparison-card-html"
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
components['comparison_output'] = comparison_output
|
| 309 |
|
| 310 |
return compare_screen, components
|
|
|
|
| 384 |
from components.analytics_charts import create_comparison_radar
|
| 385 |
radar_chart = create_comparison_radar([run_a, run_b])
|
| 386 |
|
| 387 |
+
# Generate comparison report card
|
| 388 |
+
comparison_card = generate_comparison_report_card(run_a, run_b)
|
| 389 |
+
|
| 390 |
return {
|
| 391 |
components['comparison_output']: gr.update(visible=True),
|
| 392 |
components['run_a_card']: gr.update(value=card_a),
|
| 393 |
components['run_b_card']: gr.update(value=card_b),
|
| 394 |
components['comparison_charts']: gr.update(value=charts),
|
| 395 |
components['winner_summary']: gr.update(value=summary),
|
| 396 |
+
components['radar_comparison_chart']: gr.update(value=radar_chart),
|
| 397 |
+
components['comparison_card_html']: gr.update(value=comparison_card)
|
| 398 |
}
|
| 399 |
|
| 400 |
except Exception as e:
|