Mandark-droid commited on
Commit
a50320a
·
1 Parent(s): a3b9254

Add comparison report card feature to compare screen

Browse files

- Created generate_comparison_report_card() function in components/report_cards.py
- Side-by-side comparison with winner highlighting
- Green checkmarks for winning metrics
- Overall winner recommendation
- Black background with blue border matching other report cards

- Updated screens/compare.py
- Added Report Card tab with download button
- Moved download button inside tab (matching leaderboard pattern)
- Added comparison_card_html component with proper elem_id

- Updated app.py
- Added comparison_card_html to compare button outputs
- Wired up download_comparison_card_btn click handler

- Fixed CSS styling issues
- Escaped curly braces in f-string CSS
- Renamed inner div ID to avoid conflicts
- Added .tracemind-comparison-card to download fallback
- Fixed strong tag text color to white
- Added 3px solid #667eea border
- Matched padding, border-radius, and font to other cards

Files changed (3) hide show
  1. app.py +8 -1
  2. components/report_cards.py +161 -2
  3. screens/compare.py +28 -7
app.py CHANGED
@@ -1806,7 +1806,8 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
1806
  compare_components['run_b_card'],
1807
  compare_components['comparison_charts'],
1808
  compare_components['winner_summary'],
1809
- compare_components['radar_comparison_chart']
 
1810
  ]
1811
  )
1812
 
@@ -1819,6 +1820,12 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
1819
  ]
1820
  )
1821
 
 
 
 
 
 
 
1822
  leaderboard_table.select(
1823
  fn=on_drilldown_select,
1824
  inputs=[leaderboard_table], # Pass dataframe to handler (like MockTraceMind)
 
1806
  compare_components['run_b_card'],
1807
  compare_components['comparison_charts'],
1808
  compare_components['winner_summary'],
1809
+ compare_components['radar_comparison_chart'],
1810
+ compare_components['comparison_card_html']
1811
  ]
1812
  )
1813
 
 
1820
  ]
1821
  )
1822
 
1823
+ # Download comparison report card as PNG
1824
+ compare_components['download_comparison_card_btn'].click(
1825
+ fn=None,
1826
+ js=download_card_as_png_js(element_id="comparison-card-html")
1827
+ )
1828
+
1829
  leaderboard_table.select(
1830
  fn=on_drilldown_select,
1831
  inputs=[leaderboard_table], # Pass dataframe to handler (like MockTraceMind)
components/report_cards.py CHANGED
@@ -311,8 +311,8 @@ def download_card_as_png_js(element_id: str = "summary-card-html") -> str:
311
  let card = document.getElementById('{element_id}');
312
 
313
  if (!card) {{
314
- console.log('ID not found, trying class selector...');
315
- card = document.querySelector('.tracemind-run-card');
316
  }}
317
 
318
  if (!card) {{
@@ -599,3 +599,162 @@ def _get_card_css() -> str:
599
  }
600
  </style>
601
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  let card = document.getElementById('{element_id}');
312
 
313
  if (!card) {{
314
+ console.log('ID not found, trying class selectors...');
315
+ card = document.querySelector('.tracemind-run-card, .tracemind-comparison-card, .tracemind-summary-card');
316
  }}
317
 
318
  if (!card) {{
 
599
  }
600
  </style>
601
  """
602
+
603
+
604
+ def generate_comparison_report_card(run_a_data: dict, run_b_data: dict) -> str:
605
+ """
606
+ Generate HTML for comparison report card showing two runs side by side
607
+
608
+ Args:
609
+ run_a_data: Dictionary with Run A information
610
+ run_b_data: Dictionary with Run B information
611
+
612
+ Returns:
613
+ HTML string for comparison report card
614
+ """
615
+
616
+ if not run_a_data or not run_b_data:
617
+ return _create_empty_card_html("Missing run data for comparison")
618
+
619
+ model_a = run_a_data.get('model', 'Unknown').split('/')[-1]
620
+ model_b = run_b_data.get('model', 'Unknown').split('/')[-1]
621
+
622
+ # Get logo
623
+ logo_base64 = _get_logo_base64()
624
+
625
+ # Determine winners for each metric
626
+ success_winner = "A" if run_a_data.get('success_rate', 0) > run_b_data.get('success_rate', 0) else "B"
627
+ cost_winner = "A" if run_a_data.get('total_cost_usd', 999) < run_b_data.get('total_cost_usd', 999) else "B"
628
+ speed_winner = "A" if run_a_data.get('avg_duration_ms', 999999) < run_b_data.get('avg_duration_ms', 999999) else "B"
629
+ eco_winner = "A" if run_a_data.get('co2_emissions_g', 999) < run_b_data.get('co2_emissions_g', 999) else "B"
630
+
631
+ # Count overall wins
632
+ a_wins = sum(1 for w in [success_winner, cost_winner, speed_winner, eco_winner] if w == "A")
633
+ b_wins = 4 - a_wins
634
+ overall_winner = "A" if a_wins > b_wins else ("B" if b_wins > a_wins else "Tie")
635
+
636
+ html = f"""
637
+ <div class="tracemind-comparison-card" id="comparison-card-content">
638
+ <div class="card-header">
639
+ {f'<img src="data:image/png;base64,{logo_base64}" alt="TraceMind Logo" class="card-logo" style="display: block !important; margin: 0 auto 15px auto !important; width: 120px !important; height: auto !important;" />' if logo_base64 else ''}
640
+ <h1>⚖️ Model Comparison Report</h1>
641
+ <p class="card-meta" style="color: rgba(255, 255, 255, 0.7) !important;">{model_a} vs {model_b}</p>
642
+ <p class="card-date" style="color: rgba(255, 255, 255, 0.7) !important;">{datetime.now().strftime('%Y-%m-%d %H:%M')}</p>
643
+ </div>
644
+
645
+ <div class="card-body">
646
+ <!-- Overall Winner -->
647
+ <div class="success-section">
648
+ <div class="stars">{'🏆' * 5}</div>
649
+ <div class="success-rate" style="color: #ffffff !important;">
650
+ Overall Winner: Run {overall_winner} ({a_wins if overall_winner == "A" else b_wins}/4 categories)
651
+ </div>
652
+ </div>
653
+
654
+ <!-- Side by Side Comparison -->
655
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin: 20px 0;">
656
+ <!-- Run A -->
657
+ <div style="padding: 15px; background: rgba(102, 126, 234, 0.1); border-radius: 8px; border: 2px solid {'#00ff00' if overall_winner == "A" else '#667eea'};">
658
+ <h3 style="color: #667eea !important; margin-top: 0;">Run A: {model_a}</h3>
659
+ <div class="metrics-list">
660
+ <div style="color: {'#00ff00' if success_winner == "A" else '#ffffff'} !important; font-weight: {'bold' if success_winner == "A" else 'normal'};">
661
+ {'✅' if success_winner == "A" else '📊'} Success: {run_a_data.get('success_rate', 0):.1f}%
662
+ </div>
663
+ <div style="color: {'#00ff00' if cost_winner == "A" else '#ffffff'} !important; font-weight: {'bold' if cost_winner == "A" else 'normal'};">
664
+ {'✅' if cost_winner == "A" else '💰'} Cost: ${run_a_data.get('total_cost_usd', 0):.4f}
665
+ </div>
666
+ <div style="color: {'#00ff00' if speed_winner == "A" else '#ffffff'} !important; font-weight: {'bold' if speed_winner == "A" else 'normal'};">
667
+ {'✅' if speed_winner == "A" else '⚡'} Speed: {run_a_data.get('avg_duration_ms', 0)/1000:.2f}s
668
+ </div>
669
+ <div style="color: {'#00ff00' if eco_winner == "A" else '#ffffff'} !important; font-weight: {'bold' if eco_winner == "A" else 'normal'};">
670
+ {'✅' if eco_winner == "A" else '🌱'} CO2: {run_a_data.get('co2_emissions_g', 0):.2f}g
671
+ </div>
672
+ </div>
673
+ </div>
674
+
675
+ <!-- Run B -->
676
+ <div style="padding: 15px; background: rgba(118, 75, 162, 0.1); border-radius: 8px; border: 2px solid {'#00ff00' if overall_winner == "B" else '#764ba2'};">
677
+ <h3 style="color: #764ba2 !important; margin-top: 0;">Run B: {model_b}</h3>
678
+ <div class="metrics-list">
679
+ <div style="color: {'#00ff00' if success_winner == "B" else '#ffffff'} !important; font-weight: {'bold' if success_winner == "B" else 'normal'};">
680
+ {'✅' if success_winner == "B" else '📊'} Success: {run_b_data.get('success_rate', 0):.1f}%
681
+ </div>
682
+ <div style="color: {'#00ff00' if cost_winner == "B" else '#ffffff'} !important; font-weight: {'bold' if cost_winner == "B" else 'normal'};">
683
+ {'✅' if cost_winner == "B" else '💰'} Cost: ${run_b_data.get('total_cost_usd', 0):.4f}
684
+ </div>
685
+ <div style="color: {'#00ff00' if speed_winner == "B" else '#ffffff'} !important; font-weight: {'bold' if speed_winner == "B" else 'normal'};">
686
+ {'✅' if speed_winner == "B" else '⚡'} Speed: {run_b_data.get('avg_duration_ms', 0)/1000:.2f}s
687
+ </div>
688
+ <div style="color: {'#00ff00' if eco_winner == "B" else '#ffffff'} !important; font-weight: {'bold' if eco_winner == "B" else 'normal'};">
689
+ {'✅' if eco_winner == "B" else '🌱'} CO2: {run_b_data.get('co2_emissions_g', 0):.2f}g
690
+ </div>
691
+ </div>
692
+ </div>
693
+ </div>
694
+
695
+ <!-- Recommendation -->
696
+ <div class="metrics-section">
697
+ <h2 style="color: #ffffff !important;">💡 Recommendation</h2>
698
+ <p style="color: #ffffff !important; font-size: 1.1em;">
699
+ {f"<strong style='color: #ffffff !important;'>Run {overall_winner}</strong> ({model_a if overall_winner == 'A' else model_b}) is recommended for most use cases" if overall_winner != "Tie" else "Both runs are evenly matched - choose based on your specific priorities"}
700
+ </p>
701
+ </div>
702
+ </div>
703
+
704
+ <div class="card-footer">
705
+ <p style="margin: 0; color: #ffffff !important;">🔗 <span style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-weight: 600;">View detailed comparison at tracemind.huggingface.co</span></p>
706
+ </div>
707
+ </div>
708
+
709
+ <style>
710
+ .tracemind-comparison-card {{
711
+ background: #000000 !important;
712
+ border: 3px solid #667eea;
713
+ border-radius: 24px;
714
+ padding: 40px;
715
+ max-width: 900px;
716
+ margin: 20px auto;
717
+ color: #ffffff !important;
718
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
719
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.5);
720
+ }}
721
+
722
+ .tracemind-comparison-card .card-header {{
723
+ text-align: center;
724
+ margin-bottom: 25px;
725
+ }}
726
+
727
+ .tracemind-comparison-card h1 {{
728
+ color: white !important;
729
+ font-size: 2em !important;
730
+ margin: 10px 0 !important;
731
+ font-weight: 700 !important;
732
+ }}
733
+
734
+ .tracemind-comparison-card .metrics-section h2 {{
735
+ font-size: 1.3em !important;
736
+ margin: 15px 0 10px 0 !important;
737
+ font-weight: 600 !important;
738
+ }}
739
+
740
+ .tracemind-comparison-card .metrics-list {{
741
+ margin: 10px 0;
742
+ padding: 0;
743
+ list-style: none;
744
+ }}
745
+
746
+ .tracemind-comparison-card .metrics-list div {{
747
+ padding: 8px 0;
748
+ font-size: 1em;
749
+ }}
750
+
751
+ .tracemind-comparison-card .card-footer {{
752
+ margin-top: 25px;
753
+ padding-top: 20px;
754
+ border-top: 2px solid rgba(255, 255, 255, 0.2);
755
+ text-align: center;
756
+ }}
757
+ </style>
758
+ """
759
+
760
+ return html
screens/compare.py CHANGED
@@ -7,6 +7,7 @@ import gradio as gr
7
  import plotly.graph_objects as go
8
  from plotly.subplots import make_subplots
9
  from typing import Dict, Any
 
10
 
11
 
12
  def create_run_comparison_card(run_data: Dict[str, Any], label: str) -> str:
@@ -217,12 +218,11 @@ def create_compare_ui():
217
  gr.Markdown("# Compare Runs")
218
  gr.Markdown("*Side-by-side comparison of two evaluation runs*")
219
 
220
- with gr.Row():
221
- components['back_to_leaderboard_btn'] = gr.Button(
222
- "Back to Leaderboard",
223
- variant="secondary",
224
- size="sm"
225
- )
226
 
227
  gr.Markdown("## Select Runs to Compare")
228
  with gr.Row():
@@ -288,6 +288,23 @@ def create_compare_ui():
288
  show_label=False
289
  )
290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  components['comparison_output'] = comparison_output
292
 
293
  return compare_screen, components
@@ -367,13 +384,17 @@ def on_compare_runs(run_a_id: str, run_b_id: str, leaderboard_df, components: Di
367
  from components.analytics_charts import create_comparison_radar
368
  radar_chart = create_comparison_radar([run_a, run_b])
369
 
 
 
 
370
  return {
371
  components['comparison_output']: gr.update(visible=True),
372
  components['run_a_card']: gr.update(value=card_a),
373
  components['run_b_card']: gr.update(value=card_b),
374
  components['comparison_charts']: gr.update(value=charts),
375
  components['winner_summary']: gr.update(value=summary),
376
- components['radar_comparison_chart']: gr.update(value=radar_chart)
 
377
  }
378
 
379
  except Exception as e:
 
7
  import plotly.graph_objects as go
8
  from plotly.subplots import make_subplots
9
  from typing import Dict, Any
10
+ from components.report_cards import generate_comparison_report_card
11
 
12
 
13
  def create_run_comparison_card(run_data: Dict[str, Any], label: str) -> str:
 
218
  gr.Markdown("# Compare Runs")
219
  gr.Markdown("*Side-by-side comparison of two evaluation runs*")
220
 
221
+ components['back_to_leaderboard_btn'] = gr.Button(
222
+ "⬅️ Back to Leaderboard",
223
+ variant="secondary",
224
+ size="sm"
225
+ )
 
226
 
227
  gr.Markdown("## Select Runs to Compare")
228
  with gr.Row():
 
288
  show_label=False
289
  )
290
 
291
+ with gr.TabItem("📄 Report Card"):
292
+ gr.Markdown("### 📥 Downloadable Comparison Report Card")
293
+ gr.Markdown("*Side-by-side comparison card with winner analysis*")
294
+
295
+ with gr.Row():
296
+ with gr.Column(scale=1):
297
+ components['download_comparison_card_btn'] = gr.Button(
298
+ "📥 Download as PNG",
299
+ variant="primary",
300
+ size="lg"
301
+ )
302
+ with gr.Column(scale=2):
303
+ components['comparison_card_html'] = gr.HTML(
304
+ label="Comparison Report Card",
305
+ elem_id="comparison-card-html"
306
+ )
307
+
308
  components['comparison_output'] = comparison_output
309
 
310
  return compare_screen, components
 
384
  from components.analytics_charts import create_comparison_radar
385
  radar_chart = create_comparison_radar([run_a, run_b])
386
 
387
+ # Generate comparison report card
388
+ comparison_card = generate_comparison_report_card(run_a, run_b)
389
+
390
  return {
391
  components['comparison_output']: gr.update(visible=True),
392
  components['run_a_card']: gr.update(value=card_a),
393
  components['run_b_card']: gr.update(value=card_b),
394
  components['comparison_charts']: gr.update(value=charts),
395
  components['winner_summary']: gr.update(value=summary),
396
+ components['radar_comparison_chart']: gr.update(value=radar_chart),
397
+ components['comparison_card_html']: gr.update(value=comparison_card)
398
  }
399
 
400
  except Exception as e: