Spaces:

galileo-ai
/

agent-leaderboard

Running on CPU Upgrade

App Files Files Community

Pratik Bhavsar commited on 24 days ago

Commit

69c6c68

1 Parent(s): 41c1420

added future perf prediction

Browse files

Files changed (4) hide show

components/prediction_components.py +592 -0
requirements.txt +2 -1
results_v2.csv +23 -23
tabs/leaderboard_v2.py +139 -4

components/prediction_components.py ADDED Viewed

	@@ -0,0 +1,592 @@

+"""Components for AC prediction and visualization"""
+import pandas as pd
+import numpy as np
+from datetime import datetime, timedelta
+import plotly.graph_objects as go
+try:
+    from scipy.optimize import curve_fit
+    from scipy import stats
+    HAS_SCIPY = True
+except ImportError:
+    HAS_SCIPY = False
+    # Fallback to numpy polynomial fitting
+    def curve_fit(func, xdata, ydata, p0=None, maxfev=5000, bounds=None):
+        # Simple fallback - just use polynomial fitting
+        if func.__name__ == 'exponential_growth':
+            # Linearize exponential: log(y) = log(a) + b*x
+            log_y = np.log(ydata + 0.01)  # Add small constant to avoid log(0)
+            coeffs = np.polyfit(xdata, log_y, 1)
+            a = np.exp(coeffs[1])
+            b = coeffs[0]
+            c = 0.01
+            return [a, b, c], None
+        elif func.__name__ == 'logistic_growth':
+            # Better fallback for logistic using data characteristics
+            # Estimate L (max value) as slightly above current max
+            L = min(1.0, max(ydata) * 1.2)  # Cap at 1.0
+            # Estimate x0 (midpoint) - when growth would be fastest
+            # For now, project forward from current trend
+            if len(xdata) > 1:
+                # Simple linear projection to estimate when we'd hit midpoint
+                slope = (ydata[-1] - ydata[0]) / (xdata[-1] - xdata[0])
+                if slope > 0:
+                    # Estimate days to reach L/2
+                    midpoint_value = L / 2
+                    if ydata[-1] < midpoint_value:
+                        days_to_midpoint = (midpoint_value - ydata[-1]) / slope
+                        x0 = xdata[-1] + days_to_midpoint
+                    else:
+                        x0 = np.median(xdata)
+                else:
+                    x0 = np.median(xdata)
+            else:
+                x0 = np.median(xdata)
+            # Estimate k (growth rate) based on current growth
+            k = 0.003  # Conservative default
+            return [L, k, x0], None
+        elif func.__name__ == 'power_law':
+            # Linearize power law: log(y) = log(a) + b*log(x)
+            log_x = np.log(xdata + 1)
+            log_y = np.log(ydata + 0.01)
+            coeffs = np.polyfit(log_x, log_y, 1)
+            return [np.exp(coeffs[1]), coeffs[0]], None
+        return p0, None
+def exponential_growth(x, a, b, c):
+    """Exponential growth function: y = a * exp(b * x) + c"""
+    return a * np.exp(b * x) + c
+def logistic_growth(x, L, k, x0):
+    """Logistic growth function: y = L / (1 + exp(-k*(x-x0)))"""
+    return L / (1 + np.exp(-k * (x - x0)))
+def power_law(x, a, b):
+    """Power law function: y = a * x^b"""
+    return a * np.power(x, b)
+def create_ac_prediction_chart(df, domain_filter="All", model_type_filter="All"):
+    """Create a prediction chart showing when AC will reach 99%
+    Args:
+        df: DataFrame with model data
+        domain_filter: Domain to filter by (All, Banking, Healthcare, etc.)
+        model_type_filter: Model type to filter by (All, Open Source, Proprietary)
+    """
+    # Clean up domain filter (remove emoji prefix if present)
+    if domain_filter.startswith('🌐'):
+        domain_filter = "All"
+    elif domain_filter.startswith('🏦'):
+        domain_filter = "Banking"
+    elif domain_filter.startswith('🏥'):
+        domain_filter = "Healthcare"
+    elif domain_filter.startswith('🛡️'):
+        domain_filter = "Insurance"
+    elif domain_filter.startswith('💰'):
+        domain_filter = "Investment"
+    elif domain_filter.startswith('📱'):
+        domain_filter = "Telecom"
+    # Determine which AC column to use based on domain filter
+    if domain_filter != "All":
+        ac_column = f'{domain_filter} AC'
+        # Check if domain-specific column exists
+        if ac_column not in df.columns:
+            ac_column = 'Avg AC'
+    else:
+        ac_column = 'Avg AC'
+    # Filter data to only include models with valid release dates and AC scores
+    df_clean = df.dropna(subset=['Release Date', ac_column])
+    df_clean = df_clean[df_clean[ac_column] > 0]
+    # Apply model type filter
+    if model_type_filter == "Open Source":
+        df_clean = df_clean[df_clean['Model Type'] == 'Open source']
+    elif model_type_filter == "Proprietary":
+        df_clean = df_clean[df_clean['Model Type'] == 'Proprietary']
+    # Rename the AC column to 'Avg AC' for consistent processing (only if different)
+    if ac_column != 'Avg AC':
+        # Drop the original 'Avg AC' column if it exists to avoid duplicates
+        if 'Avg AC' in df_clean.columns:
+            df_clean = df_clean.drop(columns=['Avg AC'])
+        df_clean = df_clean.rename(columns={ac_column: 'Avg AC'})
+    # Make a copy to avoid any issues with the original data
+    df_clean = df_clean.copy()
+    # Handle both YYYY-MM and YYYY-MM-DD formats
+    if df_clean['Release Date'].str.contains('-').all():
+        # Check if it's YYYY-MM format (no day component)
+        if df_clean['Release Date'].str.count('-').iloc[0] == 1:
+            # Add '-01' to make it a valid date
+            df_clean['Release Date'] = pd.to_datetime(df_clean['Release Date'] + '-01')
+        else:
+            df_clean['Release Date'] = pd.to_datetime(df_clean['Release Date'])
+    else:
+        df_clean['Release Date'] = pd.to_datetime(df_clean['Release Date'])
+    # Sort by release date
+    df_clean = df_clean.sort_values('Release Date')
+    # Create a running maximum (best performance achieved up to each date)
+    df_clean['Cumulative_Max_AC'] = df_clean['Avg AC'].expanding().max()
+    # Group by date and take the cumulative maximum for each date
+    df_best = df_clean.groupby('Release Date')['Cumulative_Max_AC'].max().reset_index()
+    df_best.columns = ['Release Date', 'Avg AC']
+    # Apply cumulative maximum again to ensure monotonic increase
+    df_best['Avg AC'] = df_best['Avg AC'].cummax()
+    # Convert dates to days since first release for curve fitting
+    first_date = df_best['Release Date'].min()
+    df_best['Days'] = (df_best['Release Date'] - first_date).dt.days
+    # Prepare data for fitting
+    x_data = df_best['Days'].values
+    y_data = df_best['Avg AC'].values
+    # With limited data (only 2 performance levels), use simple conservative linear projection
+    # Don't try to fit complex curves that will overfit
+    best_model = 'linear'
+    # Calculate simple linear trend
+    if len(x_data) > 1:
+        # Basic linear regression
+        z = np.polyfit(x_data, y_data, 1)
+        slope = z[0]
+        # Apply conservative adjustment (assume diminishing returns)
+        conservative_slope = slope * 0.5  # Assume 50% slower future improvements
+        # Create conservative linear projection
+        best_fit = [conservative_slope, y_data[-1] - conservative_slope * x_data[-1]]
+        # Calculate R² for the linear fit
+        p = np.poly1d(z)
+        y_pred = p(x_data)
+        best_r2 = 1 - (np.sum((y_data - y_pred)**2) / np.sum((y_data - y_data.mean())**2))
+    else:
+        # Single data point - use minimal growth
+        best_fit = [0.0001, y_data[0]]
+        best_r2 = 0.0
+    # Generate prediction timeline
+    future_days = np.arange(0, 5475, 30)  # 15 years in 30-day intervals
+    # Simple conservative linear projection
+    p = np.poly1d(best_fit)
+    future_ac = p(future_days)
+    # Cap predictions at 1.0
+    future_ac = np.minimum(future_ac, 1.0)
+    # Find when we reach 99%
+    target_ac = 0.99
+    crossing_idx = np.where(future_ac >= target_ac)[0]
+    if len(crossing_idx) > 0:
+        days_to_99 = future_days[crossing_idx[0]]
+        date_99 = first_date + timedelta(days=int(days_to_99))
+        months_from_now = (date_99 - datetime.now()).days / 30.4
+    else:
+        date_99 = None
+        months_from_now = None
+    # Convert future days to dates
+    future_dates = [first_date + timedelta(days=int(d)) for d in future_days]
+    # Create the plot
+    fig = go.Figure()
+    # Add confidence bands FIRST (so they appear behind other traces)
+    if best_model and best_fit is not None:
+        # Generate smooth confidence bands
+        future_std = 0.05  # Base uncertainty
+        confidence_multiplier = np.linspace(1.0, 2.0, len(future_dates))
+        upper_bound = np.minimum(future_ac + future_std * confidence_multiplier, 1.0)
+        lower_bound = np.maximum(future_ac - future_std * confidence_multiplier, 0)
+        # Add confidence band as filled area
+        fig.add_trace(go.Scatter(
+            x=future_dates + future_dates[::-1],
+            y=list(upper_bound) + list(lower_bound[::-1]),
+            fill='toself',
+            fillcolor='rgba(16, 152, 247, 0.05)',
+            line=dict(width=0),
+            showlegend=False,
+            hoverinfo='skip',
+            name='Uncertainty'
+        ))
+    # Add vendor info and additional metrics
+    df_with_vendor = df_clean.copy()
+    if 'Vendor' in df.columns:
+        vendor_map = df.set_index('Model')['Vendor'].to_dict()
+        df_with_vendor['Vendor'] = df_with_vendor['Model'].map(vendor_map).fillna('Unknown')
+    else:
+        df_with_vendor['Vendor'] = 'Unknown'
+    if 'Model Type' in df.columns:
+        type_map = df.set_index('Model')['Model Type'].to_dict()
+        df_with_vendor['Model Type'] = df_with_vendor['Model'].map(type_map).fillna('Unknown')
+    else:
+        df_with_vendor['Model Type'] = 'Unknown'
+    # Calculate additional metrics for each model
+    df_with_vendor['Gap_to_99'] = 0.99 - df_with_vendor['Avg AC']
+    df_with_vendor['Gap_to_Best'] = df_with_vendor['Cumulative_Max_AC'] - df_with_vendor['Avg AC']
+    # Get cost info if available
+    if 'Avg Total Cost' in df.columns:
+        cost_map = df.set_index('Model')['Avg Total Cost'].to_dict()
+        df_with_vendor['Cost'] = df_with_vendor['Model'].map(cost_map).fillna(0)
+    else:
+        df_with_vendor['Cost'] = 0
+    # Check if each model improved the frontier
+    df_with_vendor['Is_Frontier'] = df_with_vendor['Avg AC'] >= df_with_vendor['Cumulative_Max_AC'] - 0.001  # Small tolerance for float comparison
+    # Create frontier status text
+    frontier_status = []
+    for idx, row in df_with_vendor.iterrows():
+        if row['Is_Frontier']:
+            frontier_status.append('✅ Advanced SOTA')
+        else:
+            frontier_status.append('❌ Below existing best')
+    vendor_info = df_with_vendor['Vendor'].values
+    model_type = df_with_vendor['Model Type'].values
+    gap_to_99 = df_with_vendor['Gap_to_99'].values
+    gap_to_best = df_with_vendor['Gap_to_Best'].values
+    cost_info = df_with_vendor['Cost'].values
+    # Add historical data points with comprehensive hover
+    fig.add_trace(go.Scatter(
+        x=df_clean['Release Date'],
+        y=df_clean['Avg AC'],
+        mode='markers',
+        name='Individual Models',
+        marker=dict(
+            size=14,
+            color='rgba(227, 84, 84, 0.8)',
+            line=dict(width=2, color='rgba(255, 255, 255, 0.6)'),
+            symbol='circle'
+        ),
+        customdata=list(zip(vendor_info, model_type, gap_to_99, gap_to_best, cost_info, frontier_status)),
+        hovertemplate=(
+            '<b style="font-size: 18px; color: #E35454;">%{text}</b><br>'
+            '<br>'
+            '<b style="color: #1098F7;">Model Information:</b><br>'
+            '• <b>Vendor:</b> %{customdata[0]}<br>'
+            '• <b>Type:</b> %{customdata[1]}<br>'
+            '• <b>Released:</b> %{x|%B %Y}<br>'
+            '• <b>Frontier Status:</b> %{customdata[5]}<br>'
+            '<br>'
+            '<b style="color: #FFD700;">Performance Metrics:</b><br>'
+            '• <b>Action Completion:</b> <span style="font-size: 20px; color: #FFD700;">%{y:.1%}</span><br>'
+            '• <b>Gap to 99%:</b> <span style="color: #FF6B6B;">-%{customdata[2]:.1%}</span><br>'
+            '• <b>Behind Best:</b> <span style="color: #FFA500;">-%{customdata[3]:.1%}</span><br>'
+            '<br>'
+            '<b style="color: #28a745;">Cost Efficiency:</b><br>'
+            '• <b>Avg Session Cost:</b> $%{customdata[4]:.4f}<br>'
+            '<br>'
+            '<i style="color: #B1B5B9; font-size: 11px;">Performance at release time</i>'
+            '<extra></extra>'
+        ),
+        text=df_clean['Model'].values,
+        hoverlabel=dict(
+            bgcolor='rgba(26, 26, 46, 0.95)',
+            bordercolor='rgba(227, 84, 84, 0.5)',
+            font=dict(size=12, color='#F5F6F7', family='Geist, sans-serif'),
+            align='left',
+            namelength=-1
+        )
+    ))
+    # Calculate improvement metrics for hover
+    df_best['Improvement'] = df_best['Avg AC'].diff().fillna(0)
+    df_best['Improvement_Pct'] = (df_best['Avg AC'].pct_change() * 100).fillna(0)
+    df_best['Gap_to_99'] = 0.99 - df_best['Avg AC']
+    # Find which model is responsible for the best performance at each date
+    best_model_at_date = []
+    for date_val in df_best['Release Date']:
+        # Find all models up to and including this date
+        models_up_to_date = df_clean[df_clean['Release Date'] <= date_val]
+        if not models_up_to_date.empty:
+            # Find the model with the highest AC score up to this date
+            best_idx = models_up_to_date['Avg AC'].idxmax()
+            best_model_at_date.append(models_up_to_date.loc[best_idx, 'Model'])
+        else:
+            best_model_at_date.append('Unknown')
+    # Add best performance line with enhanced metrics
+    fig.add_trace(go.Scatter(
+        x=df_best['Release Date'],
+        y=df_best['Avg AC'],
+        mode='lines+markers',
+        name='Best Performance Trend',
+        line=dict(color='#E35454', width=4, shape='linear'),
+        marker=dict(
+            size=16,
+            color='#E35454',
+            symbol='diamond',
+            line=dict(width=2, color='white')
+        ),
+        customdata=list(zip(
+            df_best['Improvement'].values,
+            df_best['Improvement_Pct'].values,
+            df_best['Gap_to_99'].values,
+            best_model_at_date
+        )),
+        hovertemplate=(
+            '<b style="font-size: 16px; color: #E35454;">📈 Best Performance Frontier</b><br>'
+            '<br>'
+            '<b>Date:</b> %{x|%B %Y}<br>'
+            '<b>Leading Model:</b> %{customdata[3]}<br>'
+            '<b>Cumulative Best AC:</b> <span style="font-size: 20px; color: #FFD700;">%{y:.1%}</span><br>'
+            '<br>'
+            '<b>Progress Metrics:</b><br>'
+            '• Improvement: <span style="color: #28a745;">+%{customdata[0]:.1%}</span><br>'
+            '• Growth Rate: <span style="color: #28a745;">+%{customdata[1]:.1f}%</span><br>'
+            '• Gap to 99%: <span style="color: #1098F7;">%{customdata[2]:.1%}</span><br>'
+            '<br>'
+            '<i style="color: #B1B5B9;">This represents the best performance achieved by any model up to this date</i>'
+            '<extra></extra>'
+        ),
+        hoverlabel=dict(
+            bgcolor='rgba(26, 26, 46, 0.95)',
+            bordercolor='rgba(227, 84, 84, 0.5)',
+            font=dict(size=12, color='#F5F6F7', family='Geist, sans-serif'),
+            align='left',
+            namelength=-1
+        )
+    ))
+    # Calculate months from now for each prediction point
+    months_from_now_list = [(date - datetime.now()).days / 30.4 for date in future_dates]
+    years_from_now_list = [m / 12 for m in months_from_now_list]
+    # Add prediction line with comprehensive metrics
+    fig.add_trace(go.Scatter(
+        x=future_dates,
+        y=future_ac,
+        mode='lines',
+        name=f'Prediction ({best_model.capitalize()})',
+        line=dict(color='#1098F7', width=4, dash='dash'),
+        opacity=0.8,
+        customdata=list(zip(
+            [max(0, 0.99 - y) for y in future_ac],
+            months_from_now_list,
+            years_from_now_list,
+            [best_r2] * len(future_ac)
+        )),
+        hovertemplate=(
+            '<b style="font-size: 18px; color: #1098F7;">🔮 AI Performance Prediction</b><br>'
+            '<br>'
+            '<b style="color: #FFD700;">Forecast Details:</b><br>'
+            '• <b>Date:</b> %{x|%B %Y}<br>'
+            '• <b>Predicted AC:</b> <span style="font-size: 20px; color: #FFD700;">%{y:.1%}</span><br>'
+            '• <b>Gap to 99%:</b> <span style="color: #FF6B6B;">-%{customdata[0]:.1%}</span><br>'
+            '<br>'
+            '<b style="color: #28a745;">Timeline:</b><br>'
+            '• <b>Months from now:</b> %{customdata[1]:.0f} months<br>'
+            '• <b>Years from now:</b> %{customdata[2]:.1f} years<br>'
+            '<br>'
+            '<b style="color: #1098F7;">Model Confidence:</b><br>'
+            f'• <b>Algorithm:</b> {best_model.capitalize()}<br>'
+            '• <b>R² Score:</b> %{customdata[3]:.3f}<br>'
+            '<br>'
+            '<i style="color: #B1B5B9; font-size: 11px;">Based on historical performance trends</i>'
+            '<extra></extra>'
+        ),
+        hoverlabel=dict(
+            bgcolor='rgba(26, 26, 46, 0.95)',
+            bordercolor='rgba(16, 152, 247, 0.5)',
+            font=dict(size=12, color='#F5F6F7', family='Geist, sans-serif'),
+            align='left',
+            namelength=-1
+        )
+    ))
+    # Add 99% threshold line with enhanced styling
+    fig.add_hline(
+        y=0.99,
+        line_dash="dash",
+        line_color="rgba(40, 167, 69, 0.4)",
+        line_width=2,
+        annotation=dict(
+            text="<b>Enterprise-Grade Threshold (99%)</b>",
+            font=dict(size=13, color='#28a745', family='Geist, sans-serif'),
+            bgcolor='rgba(40, 167, 69, 0.15)',
+            bordercolor='#28a745',
+            borderwidth=1,
+            borderpad=4
+        ),
+        annotation_position="right"
+    )
+    # Add marker for 99% crossing point with enhanced visibility
+    if date_99:
+        # Calculate days until achievement
+        days_until = (date_99 - datetime.now()).days
+        fig.add_trace(go.Scatter(
+            x=[date_99],
+            y=[0.99],
+            mode='markers+text',
+            name='🎯 99% Achievement',
+            marker=dict(
+                size=28,
+                color='#28a745',
+                symbol='star',
+                line=dict(width=3, color='white')
+            ),
+            text=[f'<b>{date_99.strftime("%b %Y")}</b>'],
+            textposition='top center',
+            textfont=dict(size=16, color='#28a745', family='Geist, sans-serif'),
+            hovertemplate=(
+                '<b style="font-size: 18px; color: #28a745;">🎯 ENTERPRISE-READY MILESTONE</b><br>'
+                '<br>'
+                f'<b>Achievement Date:</b> <span style="font-size: 16px;">{date_99.strftime("%B %Y")}</span><br>'
+                f'<b>Time from today:</b> <span style="font-size: 16px; color: #FFD700;">{months_from_now:.0f} months</span><br>'
+                f'<b>Days remaining:</b> {days_until} days<br>'
+                f'<b>Years:</b> {months_from_now/12:.1f} years<br>'
+                '<br>'
+                '<b style="color: #1098F7;">Strategic Implications:</b><br>'
+                f'• Early adopters gain {months_from_now:.0f}-month advantage<br>'
+                '• Infrastructure investment critical now<br>'
+                '• 99% reliability enables production deployment<br>'
+                '<extra></extra>'
+            ),
+            hoverlabel=dict(
+                bgcolor='rgba(26, 26, 46, 0.95)',
+                bordercolor='rgba(40, 167, 69, 0.5)',
+                font=dict(size=12, color='#F5F6F7', family='Geist, sans-serif'),
+                align='left',
+                namelength=-1
+            )
+        ))
+    # Update layout with improved title showing active filters
+    filter_text = ""
+    if domain_filter != "All":
+        filter_text += f" - {domain_filter} Domain"
+    if model_type_filter != "All":
+        if filter_text:
+            filter_text += f", {model_type_filter} Models"
+        else:
+            filter_text += f" - {model_type_filter} Models"
+    title_text = f"<span style='font-size: 24px;'>🚀 When Will AI Agents Reach Enterprise-Grade Reliability?</span>"
+    if filter_text:
+        title_text += f"<br><span style='font-size: 14px; color: #1098F7;'>{filter_text}</span>"
+    if date_99 and months_from_now:
+        if months_from_now > 0:
+            title_text += f"<br><span style='font-size: 16px; color: #B1B5B9;'>Prediction: <b style='color: #FFD700;'>{date_99.strftime('%B %Y')}</b> (~{months_from_now:.0f} months)</span>"
+        else:
+            title_text += f"<br><span style='font-size: 16px; color: #28a745;'>Already achieved!</span>"
+    else:
+        title_text += f"<br><span style='font-size: 16px; color: #B1B5B9;'>Tracking performance improvements...</span>"
+    fig.update_layout(
+        title=dict(
+            text=title_text,
+            font=dict(size=20, family="Geist, sans-serif", color="#F5F6F7"),
+            x=0.5,
+            xanchor='center'
+        ),
+        xaxis=dict(
+            title=dict(
+                text="<b>Release Date</b>",
+                font=dict(size=16, family="Geist, sans-serif", color="#F5F6F7"),
+                standoff=20
+            ),
+            tickfont=dict(size=12, family="Geist Mono, monospace", color="#B1B5B9"),
+            gridcolor="rgba(245, 246, 247, 0.08)",
+            zerolinecolor="rgba(245, 246, 247, 0.15)",
+            showgrid=True,
+            gridwidth=1,
+            tickangle=0,
+            tickformat='%b %Y',
+            showspikes=True,
+            spikecolor="rgba(245, 246, 247, 0.3)",
+            spikethickness=1,
+            spikemode='across',
+            spikedash='dot',
+            range=[df_clean['Release Date'].min() - timedelta(days=60),
+                   min(datetime.now() + timedelta(days=800), future_dates[-1] if future_dates else datetime.now())]
+        ),
+        yaxis=dict(
+            title=dict(
+                text="<b>Action Completion (AC)</b>",
+                font=dict(size=16, family="Geist, sans-serif", color="#F5F6F7"),
+                standoff=20
+            ),
+            tickfont=dict(size=12, family="Geist Mono, monospace", color="#B1B5B9"),
+            gridcolor="rgba(245, 246, 247, 0.08)",
+            zerolinecolor="rgba(245, 246, 247, 0.15)",
+            showgrid=True,
+            gridwidth=1,
+            tickformat='.0%',
+            dtick=0.1,
+            showspikes=True,
+            spikecolor="rgba(245, 246, 247, 0.3)",
+            spikethickness=1,
+            spikemode='across',
+            spikedash='dot',
+            range=[-0.05, 1.08]
+        ),
+        plot_bgcolor="rgba(1, 9, 26, 0.98)",
+        paper_bgcolor="rgba(1, 9, 26, 0.98)",
+        height=650,
+        margin=dict(l=90, r=100, t=120, b=90),
+        hovermode='closest',
+        hoverdistance=30,
+        spikedistance=50,
+        legend=dict(
+            bgcolor="rgba(1, 9, 26, 0.9)",
+            bordercolor="rgba(245, 246, 247, 0.3)",
+            borderwidth=2,
+            font=dict(size=12, family="Geist, sans-serif", color="#F5F6F7"),
+            x=0.02,
+            y=0.98,
+            xanchor='left',
+            yanchor='top',
+            orientation='v',
+            itemsizing='constant',
+            itemwidth=40,
+            tracegroupgap=5,
+            title=dict(
+                text='<b>Legend</b>',
+                font=dict(size=13, color='#F5F6F7')
+            )
+        ),
+        showlegend=True,
+        annotations=[
+            dict(
+                text=f"<b>Model:</b> Conservative Linear | <b>Note:</b> Limited data - projection assumes diminishing returns",
+                xref="paper", yref="paper",
+                x=0.01, y=-0.12,
+                showarrow=False,
+                font=dict(size=11, color="#B1B5B9", family="Geist, sans-serif"),
+                bgcolor="rgba(1, 9, 26, 0.9)",
+                bordercolor="rgba(245, 246, 247, 0.3)",
+                borderwidth=1,
+                borderpad=4
+            )
+        ]
+    )
+    return fig, date_99, months_from_now

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ gradio==5.35.0
 pandas
 matplotlib
 plotly==5.24.1
-pydantic==2.10.6

 pandas
 matplotlib
 plotly==5.24.1
+pydantic==2.10.6
+scipy

results_v2.csv CHANGED Viewed

@@ -1,23 +1,23 @@
-Model,Vendor,Avg AC,Avg TSQ,Avg Total Cost,Avg Session Duration,Avg Turns,Banking AC,Healthcare AC,Insurance AC,Investment AC,Telecom AC,Banking TSQ,Healthcare TSQ,Insurance TSQ,Investment TSQ,Telecom TSQ,Avg Input Cost ($),Avg Output Cost ($),Banking Cost,Healthcare Cost,Insurance Cost,Investment Cost,Telecom Cost,Banking Duration,Healthcare Duration,Insurance Duration,Investment Duration,Telecom Duration,Banking Turns,Healthcare Turns,Insurance Turns,Investment Turns,Telecom Turns,Model Type,$/M input token,$/M output token,Output Type
-gpt-4.1-2025-04-14,OpenAI,0.62,0.8,0.0684,24.32,3.1,0.6,0.62,0.66,0.64,0.58,0.81,0.83,0.68,0.88,0.82,0.0577,0.0107,0.052,0.0711,0.0629,0.0777,0.0783,18.52,24.4,25.24,27.88,25.58,2.61,3.15,2.92,3.3,3.48,Proprietary,2.0,8.0,Normal
-mistral-medium-2508,Mistral,0.61,0.77,0.0199,37.45,2.98,0.57,0.6,0.7,0.57,0.59,0.74,0.75,0.73,0.87,0.76,0.0164,0.0035,0.0185,0.0196,0.0195,0.0223,0.0195,31.91,34.94,36.96,43.91,39.53,3.06,2.85,2.92,3.12,2.97,Proprietary,0.4,2.0,Normal
-gpt-4.1-mini-2025-04-14,OpenAI,0.56,0.79,0.0141,26.0,3.43,0.56,0.6,0.46,0.5,0.64,0.8,0.85,0.63,0.84,0.83,0.0123,0.0018,0.0115,0.0143,0.0131,0.0164,0.0156,21.28,26.82,23.32,30.5,28.07,2.99,3.32,3.28,3.76,3.79,Proprietary,0.4,1.6,Normal
-claude-sonnet-4-20250514,Anthropic,0.55,0.92,0.1537,66.6,2.89,0.58,0.62,0.53,0.49,0.53,0.9,0.95,0.93,0.92,0.9,0.1212,0.0325,0.1359,0.1542,0.1442,0.1669,0.1675,55.36,57.93,56.87,86.44,76.38,2.54,2.84,2.79,3.06,3.22,Proprietary,3.0,15.0,Normal
-kimi-k2-instruct,Moonshot AI,0.53,0.9,0.0386,163.62,2.84,0.58,0.49,0.58,0.47,0.53,0.89,0.91,0.88,0.93,0.91,0.0346,0.004,0.0344,0.0401,0.0367,0.0419,0.0397,165.45,155.42,164.9,161.14,171.17,2.58,2.81,2.79,3.1,2.93,Open source,1.0,3.0,Normal
-qwen3-235b-a22b-instruct-2507,Alibaba,0.53,0.85,0.0074,238.02,2.4,0.44,0.49,0.74,0.41,0.58,0.88,0.84,0.85,0.91,0.79,0.0067,0.0007,0.0059,0.0079,0.0077,0.0079,0.008,206.53,267.85,233.49,252.94,229.29,1.99,2.41,2.53,2.47,2.62,Open source,0.2,0.6,Reasoning
-qwen2.5-72b-instruct,Alibaba,0.51,0.8,0.0361,34.68,2.65,0.48,0.61,0.52,0.42,0.52,0.78,0.84,0.77,0.82,0.79,0.0338,0.0023,0.0292,0.0348,0.0338,0.0417,0.0415,27.34,29.09,30.46,41.32,45.2,2.3,2.46,2.47,3.0,3.0,Open source,0.9,0.9,Normal
-gemini-2.5-flash-lite,Google,0.47,0.84,0.0039,9.8,3.11,0.45,0.6,0.54,0.35,0.41,0.82,0.9,0.78,0.86,0.84,0.0034,0.0005,0.003,0.0036,0.0039,0.0049,0.0043,7.96,8.82,9.52,11.31,11.39,2.61,3.0,2.87,3.69,3.36,Proprietary,0.1,0.4,Reasoning
-glm-4.5-air,Zai,0.44,0.94,0.0194,69.17,4.96,0.49,0.4,0.53,0.46,0.33,0.94,0.91,0.94,0.96,0.94,0.014,0.0054,0.0152,0.021,0.0191,0.0216,0.0199,57.56,69.95,86.47,76.51,55.35,3.99,5.2,5.02,5.41,5.2,Open source,0.2,1.1,Reasoning
-gemini-2.5-pro,Google,0.43,0.86,0.1447,125.85,3.57,0.45,0.4,0.54,0.31,0.44,0.88,0.87,0.87,0.85,0.83,0.0442,0.1005,0.1253,0.1475,0.1386,0.1464,0.1656,108.83,126.91,121.99,129.62,141.92,3.1,3.57,3.49,3.7,3.97,Proprietary,1.25,10.0,Reasoning
-grok-4-0709,xAI,0.42,0.88,0.2387,225.94,3.19,0.29,0.4,0.48,0.5,0.42,0.92,0.84,0.91,0.9,0.82,0.1008,0.1379,0.2295,0.2679,0.2073,0.2257,0.2632,226.62,326.28,157.1,189.18,230.5,2.98,3.42,3.07,3.01,3.46,Proprietary,3.0,15.0,Reasoning
-deepseek-v3,Deepseek,0.4,0.8,0.0141,59.97,3.71,0.38,0.32,0.48,0.36,0.47,0.8,0.74,0.76,0.87,0.81,0.0119,0.0022,0.0123,0.0158,0.0139,0.0151,0.0138,44.46,68.38,48.54,70.21,68.27,3.27,4.2,3.48,3.79,3.83,Open source,0.27,1.1,Normal
-gemini-2.5-flash,Google,0.38,0.94,0.0271,39.84,3.9,0.48,0.38,0.44,0.22,0.36,0.94,0.94,0.94,0.94,0.95,0.0123,0.0148,0.0248,0.0283,0.0273,0.0308,0.0241,33.03,36.81,38.24,42.78,48.34,3.53,3.96,3.78,4.28,3.98,Proprietary,0.3,2.5,Reasoning
-gpt-4.1-nano-2025-04-14,OpenAI,0.38,0.63,0.0038,12.36,3.56,0.4,0.4,0.41,0.29,0.38,0.64,0.54,0.54,0.77,0.65,0.0034,0.0004,0.0029,0.0038,0.004,0.0042,0.0041,14.16,10.9,12.23,12.68,11.83,2.88,3.24,3.78,4.01,3.91,Proprietary,0.1,0.4,Normal
-qwen3-235b-a22b-thinking-2507,Alibaba,0.34,0.85,0.0584,302.24,3.12,0.42,0.3,0.42,0.23,0.34,0.84,0.82,0.86,0.91,0.84,0.0275,0.0309,0.0535,0.0679,0.0573,0.0562,0.0575,309.41,310.33,316.64,266.96,307.84,2.86,3.43,3.2,3.03,3.1,Open source,0.65,3.0,Reasoning
-magistral-medium-2506,Mistral,0.32,0.59,0.1182,32.96,4.4,0.3,0.35,0.38,0.26,0.3,0.59,0.67,0.56,0.63,0.51,0.108,0.0102,0.1067,0.0994,0.1077,0.1476,0.1294,24.98,35.81,33.33,39.18,31.49,4.21,3.46,3.92,5.36,5.07,Proprietary,2.0,5.0,Reasoning
-nova-pro-v1,Amazon,0.29,0.65,0.0359,27.96,3.04,0.33,0.29,0.39,0.17,0.29,0.6,0.57,0.64,0.83,0.6,0.0316,0.0043,0.0304,0.0353,0.0359,0.04,0.038,23.45,27.94,27.9,32.09,28.43,2.72,2.88,2.99,3.36,3.26,Proprietary,0.8,3.2,Normal
-mistral-small-2506,Mistral,0.26,0.71,0.0053,35.69,4.37,0.37,0.28,0.22,0.2,0.21,0.73,0.71,0.65,0.76,0.69,0.0049,0.0004,0.0041,0.0057,0.0054,0.0058,0.0056,30.64,36.02,30.83,41.96,39.02,3.3,4.47,4.52,4.87,4.67,Open source,0.1,0.3,Normal
-llama-3.3-70b-instruct,Meta,0.2,0.62,0.0599,19.92,3.83,0.11,0.29,0.29,0.14,0.16,0.62,0.64,0.62,0.64,0.57,0.0588,0.0011,0.055,0.0544,0.0545,0.0664,0.069,17.62,19.4,18.55,23.91,20.14,3.61,3.34,3.42,4.29,4.5,Open source,0.88,0.88,Normal
-caller,Arcee,0.16,0.65,0.0297,25.66,4.2,0.23,0.14,0.22,0.09,0.12,0.69,0.6,0.68,0.61,0.67,0.0282,0.0015,0.0262,0.0303,0.0305,0.0331,0.0286,22.83,25.54,26.42,29.66,23.85,3.76,4.19,4.18,4.75,4.14,Open source,0.55,0.85,Normal
-nova-lite-v1,Amazon,0.16,0.55,0.0031,20.26,3.73,0.12,0.18,0.19,0.15,0.18,0.48,0.49,0.58,0.72,0.49,0.0027,0.0004,0.0026,0.0033,0.0031,0.0034,0.0029,17.53,20.61,19.67,24.28,19.2,3.31,4.13,3.57,4.04,3.62,Proprietary,0.06,0.24,Normal
-magistral-small-2506,Mistral,0.16,0.53,0.0301,17.42,5.68,0.23,0.18,0.13,0.16,0.12,0.57,0.46,0.42,0.62,0.6,0.0275,0.0026,0.0245,0.0335,0.0302,0.034,0.0281,14.53,21.36,14.65,19.67,16.87,4.74,6.28,6.14,6.06,5.19,Open source,0.5,1.5,Reasoning

+Model,Vendor,Avg AC,Avg TSQ,Avg Total Cost,Avg Session Duration,Avg Turns,Banking AC,Healthcare AC,Insurance AC,Investment AC,Telecom AC,Banking TSQ,Healthcare TSQ,Insurance TSQ,Investment TSQ,Telecom TSQ,Avg Input Cost ($),Avg Output Cost ($),Banking Cost,Healthcare Cost,Insurance Cost,Investment Cost,Telecom Cost,Banking Duration,Healthcare Duration,Insurance Duration,Investment Duration,Telecom Duration,Banking Turns,Healthcare Turns,Insurance Turns,Investment Turns,Telecom Turns,Model Type,$/M input token,$/M output token,Output Type,Release Date
+gpt-4.1-2025-04-14,OpenAI,0.62,0.8,0.0684,24.32,3.1,0.6,0.62,0.66,0.64,0.58,0.81,0.83,0.68,0.88,0.82,0.0577,0.0107,0.052,0.0711,0.0629,0.0777,0.0783,18.52,24.4,25.24,27.88,25.58,2.61,3.15,2.92,3.3,3.48,Proprietary,2.0,8.0,Normal,2025-04
+mistral-medium-2508,Mistral,0.61,0.77,0.0199,37.45,2.98,0.57,0.6,0.7,0.57,0.59,0.74,0.75,0.73,0.87,0.76,0.0164,0.0035,0.0185,0.0196,0.0195,0.0223,0.0195,31.91,34.94,36.96,43.91,39.53,3.06,2.85,2.92,3.12,2.97,Proprietary,0.4,2.0,Normal,2025-08
+gpt-4.1-mini-2025-04-14,OpenAI,0.56,0.79,0.0141,26.0,3.43,0.56,0.6,0.46,0.5,0.64,0.8,0.85,0.63,0.84,0.83,0.0123,0.0018,0.0115,0.0143,0.0131,0.0164,0.0156,21.28,26.82,23.32,30.5,28.07,2.99,3.32,3.28,3.76,3.79,Proprietary,0.4,1.6,Normal,2025-04
+claude-sonnet-4-20250514,Anthropic,0.55,0.92,0.1537,66.6,2.89,0.58,0.62,0.53,0.49,0.53,0.9,0.95,0.93,0.92,0.9,0.1212,0.0325,0.1359,0.1542,0.1442,0.1669,0.1675,55.36,57.93,56.87,86.44,76.38,2.54,2.84,2.79,3.06,3.22,Proprietary,3.0,15.0,Normal,2025-05
+kimi-k2-instruct,Moonshot AI,0.53,0.9,0.0386,163.62,2.84,0.58,0.49,0.58,0.47,0.53,0.89,0.91,0.88,0.93,0.91,0.0346,0.004,0.0344,0.0401,0.0367,0.0419,0.0397,165.45,155.42,164.9,161.14,171.17,2.58,2.81,2.79,3.1,2.93,Open source,1.0,3.0,Normal,2025-07
+qwen3-235b-a22b-instruct-2507,Alibaba,0.53,0.85,0.0074,238.02,2.4,0.44,0.49,0.74,0.41,0.58,0.88,0.84,0.85,0.91,0.79,0.0067,0.0007,0.0059,0.0079,0.0077,0.0079,0.008,206.53,267.85,233.49,252.94,229.29,1.99,2.41,2.53,2.47,2.62,Open source,0.2,0.6,Reasoning,2025-07
+qwen2.5-72b-instruct,Alibaba,0.51,0.8,0.0361,34.68,2.65,0.48,0.61,0.52,0.42,0.52,0.78,0.84,0.77,0.82,0.79,0.0338,0.0023,0.0292,0.0348,0.0338,0.0417,0.0415,27.34,29.09,30.46,41.32,45.2,2.3,2.46,2.47,3.0,3.0,Open source,0.9,0.9,Normal,2024-09
+gemini-2.5-flash-lite,Google,0.47,0.84,0.0039,9.8,3.11,0.45,0.6,0.54,0.35,0.41,0.82,0.9,0.78,0.86,0.84,0.0034,0.0005,0.003,0.0036,0.0039,0.0049,0.0043,7.96,8.82,9.52,11.31,11.39,2.61,3.0,2.87,3.69,3.36,Proprietary,0.1,0.4,Reasoning,2025-07
+glm-4.5-air,Zai,0.44,0.94,0.0194,69.17,4.96,0.49,0.4,0.53,0.46,0.33,0.94,0.91,0.94,0.96,0.94,0.014,0.0054,0.0152,0.021,0.0191,0.0216,0.0199,57.56,69.95,86.47,76.51,55.35,3.99,5.2,5.02,5.41,5.2,Open source,0.2,1.1,Reasoning,2025-07
+gemini-2.5-pro,Google,0.43,0.86,0.1447,125.85,3.57,0.45,0.4,0.54,0.31,0.44,0.88,0.87,0.87,0.85,0.83,0.0442,0.1005,0.1253,0.1475,0.1386,0.1464,0.1656,108.83,126.91,121.99,129.62,141.92,3.1,3.57,3.49,3.7,3.97,Proprietary,1.25,10.0,Reasoning,2025-03
+grok-4-0709,xAI,0.42,0.88,0.2387,225.94,3.19,0.29,0.4,0.48,0.5,0.42,0.92,0.84,0.91,0.9,0.82,0.1008,0.1379,0.2295,0.2679,0.2073,0.2257,0.2632,226.62,326.28,157.1,189.18,230.5,2.98,3.42,3.07,3.01,3.46,Proprietary,3.0,15.0,Reasoning,2025-07
+deepseek-v3,Deepseek,0.4,0.8,0.0141,59.97,3.71,0.38,0.32,0.48,0.36,0.47,0.8,0.74,0.76,0.87,0.81,0.0119,0.0022,0.0123,0.0158,0.0139,0.0151,0.0138,44.46,68.38,48.54,70.21,68.27,3.27,4.2,3.48,3.79,3.83,Open source,0.27,1.1,Normal,2024-12
+gemini-2.5-flash,Google,0.38,0.94,0.0271,39.84,3.9,0.48,0.38,0.44,0.22,0.36,0.94,0.94,0.94,0.94,0.95,0.0123,0.0148,0.0248,0.0283,0.0273,0.0308,0.0241,33.03,36.81,38.24,42.78,48.34,3.53,3.96,3.78,4.28,3.98,Proprietary,0.3,2.5,Reasoning,2025-06
+gpt-4.1-nano-2025-04-14,OpenAI,0.38,0.63,0.0038,12.36,3.56,0.4,0.4,0.41,0.29,0.38,0.64,0.54,0.54,0.77,0.65,0.0034,0.0004,0.0029,0.0038,0.004,0.0042,0.0041,14.16,10.9,12.23,12.68,11.83,2.88,3.24,3.78,4.01,3.91,Proprietary,0.1,0.4,Normal,2025-04
+qwen3-235b-a22b-thinking-2507,Alibaba,0.34,0.85,0.0584,302.24,3.12,0.42,0.3,0.42,0.23,0.34,0.84,0.82,0.86,0.91,0.84,0.0275,0.0309,0.0535,0.0679,0.0573,0.0562,0.0575,309.41,310.33,316.64,266.96,307.84,2.86,3.43,3.2,3.03,3.1,Open source,0.65,3.0,Reasoning,2025-07
+magistral-medium-2506,Mistral,0.32,0.59,0.1182,32.96,4.4,0.3,0.35,0.38,0.26,0.3,0.59,0.67,0.56,0.63,0.51,0.108,0.0102,0.1067,0.0994,0.1077,0.1476,0.1294,24.98,35.81,33.33,39.18,31.49,4.21,3.46,3.92,5.36,5.07,Proprietary,2.0,5.0,Reasoning,2025-06
+nova-pro-v1,Amazon,0.29,0.65,0.0359,27.96,3.04,0.33,0.29,0.39,0.17,0.29,0.6,0.57,0.64,0.83,0.6,0.0316,0.0043,0.0304,0.0353,0.0359,0.04,0.038,23.45,27.94,27.9,32.09,28.43,2.72,2.88,2.99,3.36,3.26,Proprietary,0.8,3.2,Normal,2024-12
+mistral-small-2506,Mistral,0.26,0.71,0.0053,35.69,4.37,0.37,0.28,0.22,0.2,0.21,0.73,0.71,0.65,0.76,0.69,0.0049,0.0004,0.0041,0.0057,0.0054,0.0058,0.0056,30.64,36.02,30.83,41.96,39.02,3.3,4.47,4.52,4.87,4.67,Open source,0.1,0.3,Normal,2025-06
+llama-3.3-70b-instruct,Meta,0.2,0.62,0.0599,19.92,3.83,0.11,0.29,0.29,0.14,0.16,0.62,0.64,0.62,0.64,0.57,0.0588,0.0011,0.055,0.0544,0.0545,0.0664,0.069,17.62,19.4,18.55,23.91,20.14,3.61,3.34,3.42,4.29,4.5,Open source,0.88,0.88,Normal,2024-12
+caller,Arcee,0.16,0.65,0.0297,25.66,4.2,0.23,0.14,0.22,0.09,0.12,0.69,0.6,0.68,0.61,0.67,0.0282,0.0015,0.0262,0.0303,0.0305,0.0331,0.0286,22.83,25.54,26.42,29.66,23.85,3.76,4.19,4.18,4.75,4.14,Open source,0.55,0.85,Normal,2025-01
+nova-lite-v1,Amazon,0.16,0.55,0.0031,20.26,3.73,0.12,0.18,0.19,0.15,0.18,0.48,0.49,0.58,0.72,0.49,0.0027,0.0004,0.0026,0.0033,0.0031,0.0034,0.0029,17.53,20.61,19.67,24.28,19.2,3.31,4.13,3.57,4.04,3.62,Proprietary,0.06,0.24,Normal,2024-12
+magistral-small-2506,Mistral,0.16,0.53,0.0301,17.42,5.68,0.23,0.18,0.13,0.16,0.12,0.57,0.46,0.42,0.62,0.6,0.0275,0.0026,0.0245,0.0335,0.0302,0.034,0.0281,14.53,21.36,14.65,19.67,16.87,4.74,6.28,6.14,6.06,5.19,Open source,0.5,1.5,Reasoning,2025-06

tabs/leaderboard_v2.py CHANGED Viewed

@@ -4,10 +4,11 @@ import plotly.graph_objects as go
 # Import components and styles from modular files
 from components.leaderboard_components import (
-    get_chart_colors, get_rank_badge, get_type_badge,
     get_output_type_badge, get_score_bar, get_metric_tooltip,
     get_responsive_styles, get_faq_section, SORT_COLUMN_MAP
 )
 from styles.leaderboard_styles import get_leaderboard_css
@@ -1639,7 +1640,112 @@ def create_leaderboard_v2_tab():
         </div>
     </div>
     """)
     # Radar Chart Section
     gr.HTML("""
     <div class="dark-container" style="margin-bottom: 24px;">
@@ -1847,22 +1953,51 @@ def create_leaderboard_v2_tab():
         return create_domain_radar_chart(load_leaderboard_data(), sort_by, valid_selected)
     # Update table when filters change
     filter_inputs = [domain_filter, model_type_filter, reasoning_filter, sort_by, sort_order]
     for input_component in filter_inputs:
         input_component.change(
             fn=update_table,
             inputs=filter_inputs,
             outputs=[leaderboard_title, leaderboard_table]
         )
         # Also update radar chart when filters change
         input_component.change(
             fn=update_radar_chart,
             inputs=filter_inputs + [model_selector],
             outputs=[model_selector, radar_chart]
         )
     # Update radar chart when model selection changes
     model_selector.change(

 # Import components and styles from modular files
 from components.leaderboard_components import (
+    get_chart_colors, get_rank_badge, get_type_badge,
     get_output_type_badge, get_score_bar, get_metric_tooltip,
     get_responsive_styles, get_faq_section, SORT_COLUMN_MAP
 )
+from components.prediction_components import create_ac_prediction_chart
 from styles.leaderboard_styles import get_leaderboard_css
         </div>
     </div>
     """)
+    # AI Agent Reliability Prediction Section
+    gr.HTML("""
+    <div class="dark-container" style="margin-bottom: 24px;">
+        <div class="section-header">
+            <span class="section-icon" style="color: var(--accent-secondary);">📈</span>
+            <h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Geist', sans-serif; font-weight: 700;">
+                Enterprise Readiness Prediction
+            </h3>
+        </div>
+        <p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Geist', sans-serif;">
+            When will AI agents reach 99% reliability for enterprise deployment?
+        </p>
+    """)
+    # Add prediction chart - make it reactive
+    gr.HTML('<div class="chart-container">')
+    # Create initial prediction with default filters
+    initial_prediction_chart, initial_date_99, initial_months_to_99 = create_ac_prediction_chart(
+        load_leaderboard_data(), domain_filter="All", model_type_filter="All"
+    )
+    prediction_plot = gr.Plot(
+        label="",
+        value=initial_prediction_chart,
+        elem_classes=["prediction-chart", "plot-container"]
+    )
+    gr.HTML('</div>')
+    # Add dynamic insights section
+    def generate_insight_html(date_99, months_to_99, domain_filter="All", model_type_filter="All"):
+        """Generate insight HTML based on prediction results and filters"""
+        # Clean up filter names
+        if domain_filter.startswith('🌐'):
+            domain_clean = "All Domains"
+        elif domain_filter.startswith('🏦'):
+            domain_clean = "Banking"
+        elif domain_filter.startswith('🏥'):
+            domain_clean = "Healthcare"
+        elif domain_filter.startswith('🛡️'):
+            domain_clean = "Insurance"
+        elif domain_filter.startswith('💰'):
+            domain_clean = "Investment"
+        elif domain_filter.startswith('📱'):
+            domain_clean = "Telecom"
+        else:
+            domain_clean = domain_filter
+        filter_context = ""
+        if domain_clean != "All Domains" or model_type_filter != "All":
+            filter_context = " for "
+            if domain_clean != "All Domains":
+                filter_context += f"<strong>{domain_clean}</strong>"
+            if model_type_filter != "All":
+                if domain_clean != "All Domains":
+                    filter_context += f" ({model_type_filter} models)"
+                else:
+                    filter_context += f"<strong>{model_type_filter} models</strong>"
+        if date_99 and months_to_99:
+            if months_to_99 > 0:
+                return f"""
+                <div style="margin-top: 20px; padding: 16px; background: linear-gradient(145deg, rgba(16, 152, 247, 0.1) 0%, rgba(227, 84, 84, 0.05) 100%); border: 1px solid var(--border-subtle); border-radius: 12px;">
+                    <div style="display: flex; align-items: center; gap: 12px; margin-bottom: 12px;">
+                        <span style="font-size: 1.5rem;">🎯</span>
+                        <span style="font-weight: 700; color: var(--text-primary); font-size: 1.1rem;">Key Prediction Insights{filter_context}</span>
+                    </div>
+                    <ul style="color: var(--text-secondary); margin: 0; padding-left: 20px; line-height: 1.8;">
+                        <li><strong style="color: var(--accent-primary);">Enterprise threshold (99% AC)</strong> predicted by <strong style="color: var(--accent-secondary);">{date_99.strftime('%B %Y')}</strong></li>
+                        <li>Approximately <strong style="color: var(--accent-primary);">{months_to_99:.0f} months</strong> from current date</li>
+                        <li>Performance trends based on <strong>historical data{filter_context}</strong></li>
+                        <li>Early adopters who invest now gain <strong>{months_to_99:.0f} months</strong> of competitive advantage</li>
+                    </ul>
+                </div>
+                """
+            else:
+                return f"""
+                <div style="margin-top: 20px; padding: 16px; background: linear-gradient(145deg, rgba(40, 167, 69, 0.1) 0%, rgba(227, 84, 84, 0.05) 100%); border: 1px solid var(--border-subtle); border-radius: 12px;">
+                    <div style="display: flex; align-items: center; gap: 12px; margin-bottom: 12px;">
+                        <span style="font-size: 1.5rem;">✅</span>
+                        <span style="font-weight: 700; color: var(--text-primary); font-size: 1.1rem;">Enterprise Ready{filter_context}!</span>
+                    </div>
+                    <ul style="color: var(--text-secondary); margin: 0; padding-left: 20px; line-height: 1.8;">
+                        <li>Models{filter_context} have <strong style="color: #28a745;">already achieved</strong> near-enterprise reliability</li>
+                        <li>Focus should shift to <strong>implementation and scaling</strong></li>
+                        <li>Investment in guardrails and observability is <strong>critical now</strong></li>
+                    </ul>
+                </div>
+                """
+        else:
+            return f"""
+            <div style="margin-top: 20px; padding: 16px; background: rgba(245, 246, 247, 0.03); border: 1px solid var(--border-subtle); border-radius: 12px;">
+                <p style="color: var(--text-secondary); margin: 0;">
+                    {'Insufficient data' + filter_context + ' to make reliable predictions. More models need to be evaluated in this category.' if (domain_clean != "All Domains" or model_type_filter != "All") else 'Based on current data trends, we are tracking the exponential improvement in AI agent capabilities. As more models are released, our predictions will become more accurate.'}
+                </p>
+            </div>
+            """
+    # Create the insights HTML component
+    prediction_insights = gr.HTML(
+        generate_insight_html(initial_date_99, initial_months_to_99, "All", "All")
+    )
+    gr.HTML("</div>")
     # Radar Chart Section
     gr.HTML("""
     <div class="dark-container" style="margin-bottom: 24px;">
         return create_domain_radar_chart(load_leaderboard_data(), sort_by, valid_selected)
+    # Function to update prediction chart and insights
+    def update_prediction_chart_and_insights(domain_filter, model_type_filter):
+        """Update prediction chart and insights based on filters"""
+        df = load_leaderboard_data()
+        # Create new prediction chart with filters
+        chart, date_99, months_to_99 = create_ac_prediction_chart(
+            df, domain_filter=domain_filter, model_type_filter=model_type_filter
+        )
+        # Generate new insights HTML
+        insights_html = generate_insight_html(date_99, months_to_99, domain_filter, model_type_filter)
+        return chart, insights_html
     # Update table when filters change
     filter_inputs = [domain_filter, model_type_filter, reasoning_filter, sort_by, sort_order]
     for input_component in filter_inputs:
         input_component.change(
             fn=update_table,
             inputs=filter_inputs,
             outputs=[leaderboard_title, leaderboard_table]
         )
         # Also update radar chart when filters change
         input_component.change(
             fn=update_radar_chart,
             inputs=filter_inputs + [model_selector],
             outputs=[model_selector, radar_chart]
         )
+    # Update prediction chart when domain or model type filters change
+    # Only react to domain_filter and model_type_filter, not other filters
+    domain_filter.change(
+        fn=update_prediction_chart_and_insights,
+        inputs=[domain_filter, model_type_filter],
+        outputs=[prediction_plot, prediction_insights]
+    )
+    model_type_filter.change(
+        fn=update_prediction_chart_and_insights,
+        inputs=[domain_filter, model_type_filter],
+        outputs=[prediction_plot, prediction_insights]
+    )
     # Update radar chart when model selection changes
     model_selector.change(