""" # Generate table rows for idx, row in df_sorted.iterrows(): rank = idx + 1 # Convert row to dictionary for data attributes (like reference implementation) row_dict = row.to_dict() # Generate data attributes dynamically from all row data data_attrs_dict = {} for key, value in row_dict.items(): # Convert underscores to hyphens for HTML data attributes attr_name = f"data-{key.replace('_', '-')}" # Handle None/NaN values if pd.isna(value): data_attrs_dict[attr_name] = "None" else: data_attrs_dict[attr_name] = str(value) # Create the data attributes string data_attrs = " ".join([f'{key}="{value}"' for key, value in data_attrs_dict.items()]) # Get values with safe defaults for display model = row.get('model', 'Unknown') agent_type = row.get('agent_type', 'unknown') provider = row.get('provider', 'unknown') success_rate = row.get('success_rate', 0.0) total_tests = row.get('total_tests', 0) successful_tests = row.get('successful_tests', 0) failed_tests = row.get('failed_tests', 0) avg_steps = row.get('avg_steps', 0.0) avg_duration_ms = row.get('avg_duration_ms', 0.0) total_tokens = row.get('total_tokens', 0) total_cost_usd = row.get('total_cost_usd', 0.0) co2_emissions_g = row.get('co2_emissions_g', 0.0) gpu_utilization_avg = row.get('gpu_utilization_avg', None) gpu_memory_avg_mib = row.get('gpu_memory_avg_mib', None) gpu_memory_max_mib = row.get('gpu_memory_max_mib', None) gpu_temperature_avg = row.get('gpu_temperature_avg', None) gpu_temperature_max = row.get('gpu_temperature_max', None) gpu_power_avg_w = row.get('gpu_power_avg_w', None) timestamp = row.get('timestamp', '') submitted_by = row.get('submitted_by', 'Unknown') # Check if GPU job has_gpu = pd.notna(gpu_utilization_avg) and gpu_utilization_avg > 0 # Format GPU utilization if has_gpu: gpu_display = get_gpu_utilization_bar(gpu_utilization_avg) else: gpu_display = 'N/A' # Format CO2 if pd.notna(co2_emissions_g) and co2_emissions_g > 0: co2_display = f'{co2_emissions_g:.2f}g' else: co2_display = 'N/A' # Format GPU Memory if pd.notna(gpu_memory_avg_mib) and pd.notna(gpu_memory_max_mib): gpu_mem_display = f'{gpu_memory_avg_mib:.0f}/{gpu_memory_max_mib:.0f}' else: gpu_mem_display = 'N/A' # Format GPU Temperature if pd.notna(gpu_temperature_avg) and pd.notna(gpu_temperature_max): gpu_temp_display = f'{gpu_temperature_avg:.0f}/{gpu_temperature_max:.0f}°C' else: gpu_temp_display = 'N/A' # Format GPU Power if pd.notna(gpu_power_avg_w): gpu_power_display = f'{gpu_power_avg_w:.1f}W' else: gpu_power_display = 'N/A' # Format timestamp from datetime import datetime if pd.notna(timestamp): try: # Handle both string and Timestamp objects if isinstance(timestamp, pd.Timestamp): timestamp_display = timestamp.strftime('%Y-%m-%d %H:%M') else: dt = datetime.fromisoformat(str(timestamp).replace('Z', '+00:00')) timestamp_display = dt.strftime('%Y-%m-%d %H:%M') except Exception as e: timestamp_display = str(timestamp)[:16] if timestamp else 'N/A' else: timestamp_display = 'N/A' # Format Run ID (show first 8 characters) run_id = row.get('run_id', 'N/A') run_id_short = run_id[:8] + '...' if len(run_id) > 8 else run_id html += f""" """ html += """

Rank	Run ID	Model	Type	Provider	Hardware	Success Rate	Tests (P/F)	Steps	Duration	Tokens	Cost	CO2	GPU Util	GPU Mem	GPU Temp	GPU Power	Timestamp	Submitted By
{get_rank_badge(rank)}	{run_id_short}	{model}	{get_agent_type_badge(agent_type)}	{get_provider_badge(provider)}	{get_hardware_badge(has_gpu)}	{get_success_rate_bar(success_rate)}	{total_tests} / {successful_tests} / {failed_tests}	{avg_steps:.1f}	{format_duration(avg_duration_ms)}	{total_tokens:,}	{format_cost(total_cost_usd)}	{co2_display}	{gpu_display}	{gpu_mem_display}	{gpu_temp_display}	{gpu_power_display}	{timestamp_display}	{submitted_by}

No Evaluation Results Yet