Spaces:
Sleeping
Sleeping
Yacine Jernite
commited on
Commit
·
9e65b63
1
Parent(s):
bc0c2e4
simplified
Browse files- app.py +56 -116
- ui/tab_dataset.py +6 -21
- utils/constants.py +2 -35
- utils/dataset.py +2 -6
- utils/helpers.py +24 -22
app.py
CHANGED
|
@@ -17,6 +17,7 @@ from utils.dataset import (
|
|
| 17 |
)
|
| 18 |
from utils.helpers import (
|
| 19 |
check_token_availability,
|
|
|
|
| 20 |
format_token_status,
|
| 21 |
get_inference_token,
|
| 22 |
get_org_token,
|
|
@@ -39,6 +40,31 @@ from ui.tab_testing import (
|
|
| 39 |
# Handlers
|
| 40 |
# ============================================================================
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, save_mode, oauth_token: gr.OAuthToken | None = None):
|
| 43 |
"""Handle test execution."""
|
| 44 |
|
|
@@ -81,57 +107,25 @@ def handle_run_test(test_input, current_policy, model_choice, reasoning_effort,
|
|
| 81 |
org_token = get_org_token()
|
| 82 |
if org_token:
|
| 83 |
try:
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
"categories_and_reasoning": categories_and_reasoning_text,
|
| 91 |
-
"policy": current_policy,
|
| 92 |
-
"model_selection": model_choice,
|
| 93 |
-
"raw_response": raw_response,
|
| 94 |
-
"reasoning_trace": reasoning or "",
|
| 95 |
-
"reasoning_effort": reasoning_effort or "",
|
| 96 |
-
"max_tokens": int(max_tokens),
|
| 97 |
-
"temperature": float(temperature),
|
| 98 |
-
"top_p": float(top_p),
|
| 99 |
-
"system_prompt": system_prompt_val or "",
|
| 100 |
-
"response_format": response_format_val or "",
|
| 101 |
-
"timestamp": datetime.now().isoformat(),
|
| 102 |
-
}
|
| 103 |
-
repo_id = get_roost_dataset_repo_id()
|
| 104 |
-
save_to_dataset(repo_id, org_token, data)
|
| 105 |
except Exception as e:
|
| 106 |
-
# Log error but don't break test execution
|
| 107 |
print(f"Failed to save to ROOST dataset: {e}")
|
| 108 |
elif save_mode == "Save to Private Dataset":
|
| 109 |
personal_token, _ = get_personal_token(oauth_token)
|
| 110 |
if personal_token:
|
| 111 |
try:
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
"categories_and_reasoning": categories_and_reasoning_text,
|
| 119 |
-
"policy": current_policy,
|
| 120 |
-
"model_selection": model_choice,
|
| 121 |
-
"raw_response": raw_response,
|
| 122 |
-
"reasoning_trace": reasoning or "",
|
| 123 |
-
"reasoning_effort": reasoning_effort or "",
|
| 124 |
-
"max_tokens": int(max_tokens),
|
| 125 |
-
"temperature": float(temperature),
|
| 126 |
-
"top_p": float(top_p),
|
| 127 |
-
"system_prompt": system_prompt_val or "",
|
| 128 |
-
"response_format": response_format_val or "",
|
| 129 |
-
"timestamp": datetime.now().isoformat(),
|
| 130 |
-
}
|
| 131 |
-
repo_id = get_dataset_repo_id(personal_token)
|
| 132 |
-
save_to_dataset(repo_id, personal_token, data)
|
| 133 |
except Exception as e:
|
| 134 |
-
# Log error but don't break test execution
|
| 135 |
print(f"Failed to save to private dataset: {e}")
|
| 136 |
|
| 137 |
return (
|
|
@@ -237,55 +231,19 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
|
|
| 237 |
outputs=model_info_display,
|
| 238 |
)
|
| 239 |
|
| 240 |
-
# Token status update handler
|
| 241 |
-
def update_token_status(oauth_token: gr.OAuthToken | None = None):
|
| 242 |
-
"""Update token status markdown when OAuth changes."""
|
| 243 |
-
return format_token_status(oauth_token)
|
| 244 |
-
|
| 245 |
-
# Save mode help text update handler
|
| 246 |
-
def update_save_mode_help(oauth_token: gr.OAuthToken | None = None):
|
| 247 |
-
"""Update save mode help text based on token availability."""
|
| 248 |
-
from ui.tab_testing import format_save_mode_help
|
| 249 |
-
has_personal, has_org = check_token_availability(oauth_token)
|
| 250 |
-
return format_save_mode_help(has_personal, has_org)
|
| 251 |
-
|
| 252 |
-
# Dataset button state update handler
|
| 253 |
-
def update_dataset_button_states(oauth_token: gr.OAuthToken | None = None):
|
| 254 |
-
"""Update dataset button states based on token availability."""
|
| 255 |
-
has_personal, has_org = check_token_availability(oauth_token)
|
| 256 |
-
|
| 257 |
-
# Update help text
|
| 258 |
-
help_text = (
|
| 259 |
-
f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
|
| 260 |
-
f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
|
| 261 |
-
)
|
| 262 |
-
|
| 263 |
-
return (
|
| 264 |
-
gr.update(interactive=has_personal), # refresh_private_btn
|
| 265 |
-
gr.update(interactive=True), # refresh_roost_btn (can load if public)
|
| 266 |
-
help_text, # dataset_help_text
|
| 267 |
-
)
|
| 268 |
-
|
| 269 |
# Combined handler for login button click - updates all token-dependent UI
|
| 270 |
def handle_login_click(oauth_token: gr.OAuthToken | None = None):
|
| 271 |
"""Handle login button click and update all token-dependent UI."""
|
| 272 |
-
token_status = format_token_status(oauth_token)
|
| 273 |
-
|
| 274 |
from ui.tab_testing import format_save_mode_help
|
| 275 |
-
has_personal, has_org = check_token_availability(oauth_token)
|
| 276 |
-
save_help = format_save_mode_help(has_personal, has_org)
|
| 277 |
|
| 278 |
-
|
| 279 |
-
f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
|
| 280 |
-
f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
|
| 281 |
-
)
|
| 282 |
|
| 283 |
return (
|
| 284 |
-
|
| 285 |
-
|
| 286 |
gr.update(interactive=has_personal), # refresh_private_btn
|
| 287 |
gr.update(interactive=True), # refresh_roost_btn
|
| 288 |
-
|
| 289 |
)
|
| 290 |
|
| 291 |
login_button.click(
|
|
@@ -303,66 +261,48 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
|
|
| 303 |
# Dataset load handler
|
| 304 |
def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list):
|
| 305 |
"""Load example from dataset and populate all fields."""
|
| 306 |
-
if
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
|
| 310 |
|
| 311 |
try:
|
| 312 |
-
# Find index by matching label
|
| 313 |
idx = dropdown_choices_list.index(selected_label)
|
| 314 |
-
if
|
| 315 |
-
return None
|
| 316 |
|
| 317 |
example = cached_examples_list[idx]
|
| 318 |
-
|
| 319 |
-
# Get policy - ensure it's a string (not None)
|
| 320 |
policy = example.get("policy", "") or ""
|
| 321 |
-
|
| 322 |
-
# Extract saved results
|
| 323 |
policy_violation = example.get("policy_violation", -1)
|
| 324 |
-
categories_and_reasoning = example.get("categories_and_reasoning", "")
|
| 325 |
-
raw_response = example.get("raw_response", "")
|
| 326 |
-
reasoning_trace = example.get("reasoning_trace", "")
|
| 327 |
model_selection = example.get("model_selection", "")
|
| 328 |
reasoning_effort_val = example.get("reasoning_effort", "")
|
|
|
|
| 329 |
|
| 330 |
# Format label text
|
| 331 |
-
if policy_violation == 1
|
| 332 |
-
|
| 333 |
-
elif policy_violation == 0:
|
| 334 |
-
label_text = "## ✅ No Policy Violation"
|
| 335 |
-
else:
|
| 336 |
-
label_text = "## ⚠️ Unable to determine label"
|
| 337 |
-
|
| 338 |
-
# Format model info
|
| 339 |
-
model_info = format_model_info(model_selection, reasoning_effort_val)
|
| 340 |
|
| 341 |
-
# Format reasoning info
|
| 342 |
reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_selection, reasoning_trace)
|
| 343 |
-
|
| 344 |
reasoning_visible = bool(reasoning_trace and reasoning_trace.strip())
|
| 345 |
|
| 346 |
return (
|
| 347 |
example.get("input", ""),
|
| 348 |
-
policy,
|
| 349 |
example.get("model_selection", ""),
|
| 350 |
-
|
| 351 |
example.get("max_tokens", 0),
|
| 352 |
example.get("temperature", 0.0),
|
| 353 |
example.get("top_p", 0.0),
|
| 354 |
example.get("system_prompt", ""),
|
| 355 |
example.get("response_format", ""),
|
| 356 |
-
|
| 357 |
-
model_info,
|
| 358 |
label_text,
|
| 359 |
-
categories_and_reasoning,
|
| 360 |
-
raw_response,
|
| 361 |
gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
|
| 362 |
gr.update(value=reasoning_trace or "", visible=reasoning_visible),
|
| 363 |
)
|
| 364 |
except (ValueError, IndexError):
|
| 365 |
-
return None
|
| 366 |
|
| 367 |
example_dropdown.change(
|
| 368 |
load_example_from_dataset,
|
|
|
|
| 17 |
)
|
| 18 |
from utils.helpers import (
|
| 19 |
check_token_availability,
|
| 20 |
+
format_dataset_help_text,
|
| 21 |
format_token_status,
|
| 22 |
get_inference_token,
|
| 23 |
get_org_token,
|
|
|
|
| 40 |
# Handlers
|
| 41 |
# ============================================================================
|
| 42 |
|
| 43 |
+
def prepare_save_data(test_input, current_policy, parsed, model_choice, raw_response,
|
| 44 |
+
reasoning, reasoning_effort, max_tokens, temperature, top_p,
|
| 45 |
+
system_prompt_val, response_format_val):
|
| 46 |
+
"""Prepare data dict for saving to dataset."""
|
| 47 |
+
categories_and_reasoning_text = format_categories_and_reasoning(parsed)
|
| 48 |
+
policy_violation = parsed.get("label", -1)
|
| 49 |
+
|
| 50 |
+
return {
|
| 51 |
+
"input": test_input,
|
| 52 |
+
"policy_violation": policy_violation,
|
| 53 |
+
"categories_and_reasoning": categories_and_reasoning_text,
|
| 54 |
+
"policy": current_policy,
|
| 55 |
+
"model_selection": model_choice,
|
| 56 |
+
"raw_response": raw_response,
|
| 57 |
+
"reasoning_trace": reasoning or "",
|
| 58 |
+
"reasoning_effort": reasoning_effort or "",
|
| 59 |
+
"max_tokens": int(max_tokens),
|
| 60 |
+
"temperature": float(temperature),
|
| 61 |
+
"top_p": float(top_p),
|
| 62 |
+
"system_prompt": system_prompt_val or "",
|
| 63 |
+
"response_format": response_format_val or "",
|
| 64 |
+
"timestamp": datetime.now().isoformat(),
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
|
| 68 |
def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, save_mode, oauth_token: gr.OAuthToken | None = None):
|
| 69 |
"""Handle test execution."""
|
| 70 |
|
|
|
|
| 107 |
org_token = get_org_token()
|
| 108 |
if org_token:
|
| 109 |
try:
|
| 110 |
+
data = prepare_save_data(
|
| 111 |
+
test_input, current_policy, parsed, model_choice, raw_response,
|
| 112 |
+
reasoning, reasoning_effort, max_tokens, temperature, top_p,
|
| 113 |
+
system_prompt_val, response_format_val
|
| 114 |
+
)
|
| 115 |
+
save_to_dataset(get_roost_dataset_repo_id(), org_token, data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
except Exception as e:
|
|
|
|
| 117 |
print(f"Failed to save to ROOST dataset: {e}")
|
| 118 |
elif save_mode == "Save to Private Dataset":
|
| 119 |
personal_token, _ = get_personal_token(oauth_token)
|
| 120 |
if personal_token:
|
| 121 |
try:
|
| 122 |
+
data = prepare_save_data(
|
| 123 |
+
test_input, current_policy, parsed, model_choice, raw_response,
|
| 124 |
+
reasoning, reasoning_effort, max_tokens, temperature, top_p,
|
| 125 |
+
system_prompt_val, response_format_val
|
| 126 |
+
)
|
| 127 |
+
save_to_dataset(get_dataset_repo_id(personal_token), personal_token, data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
except Exception as e:
|
|
|
|
| 129 |
print(f"Failed to save to private dataset: {e}")
|
| 130 |
|
| 131 |
return (
|
|
|
|
| 231 |
outputs=model_info_display,
|
| 232 |
)
|
| 233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
# Combined handler for login button click - updates all token-dependent UI
|
| 235 |
def handle_login_click(oauth_token: gr.OAuthToken | None = None):
|
| 236 |
"""Handle login button click and update all token-dependent UI."""
|
|
|
|
|
|
|
| 237 |
from ui.tab_testing import format_save_mode_help
|
|
|
|
|
|
|
| 238 |
|
| 239 |
+
has_personal, has_org = check_token_availability(oauth_token)
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
return (
|
| 242 |
+
format_token_status(oauth_token), # token_status_markdown
|
| 243 |
+
format_save_mode_help(has_personal, has_org), # save_mode_help
|
| 244 |
gr.update(interactive=has_personal), # refresh_private_btn
|
| 245 |
gr.update(interactive=True), # refresh_roost_btn
|
| 246 |
+
format_dataset_help_text(has_personal, has_org), # dataset_help_text
|
| 247 |
)
|
| 248 |
|
| 249 |
login_button.click(
|
|
|
|
| 261 |
# Dataset load handler
|
| 262 |
def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list):
|
| 263 |
"""Load example from dataset and populate all fields."""
|
| 264 |
+
if not (cached_examples_list and selected_label and dropdown_choices_list and
|
| 265 |
+
selected_label in dropdown_choices_list):
|
| 266 |
+
return [None] * 15
|
|
|
|
| 267 |
|
| 268 |
try:
|
|
|
|
| 269 |
idx = dropdown_choices_list.index(selected_label)
|
| 270 |
+
if not (0 <= idx < len(cached_examples_list)):
|
| 271 |
+
return [None] * 15
|
| 272 |
|
| 273 |
example = cached_examples_list[idx]
|
|
|
|
|
|
|
| 274 |
policy = example.get("policy", "") or ""
|
|
|
|
|
|
|
| 275 |
policy_violation = example.get("policy_violation", -1)
|
|
|
|
|
|
|
|
|
|
| 276 |
model_selection = example.get("model_selection", "")
|
| 277 |
reasoning_effort_val = example.get("reasoning_effort", "")
|
| 278 |
+
reasoning_trace = example.get("reasoning_trace", "")
|
| 279 |
|
| 280 |
# Format label text
|
| 281 |
+
emoji = "❌" if policy_violation == 1 else "✅" if policy_violation == 0 else "⚠️"
|
| 282 |
+
label_text = f"## {emoji} {'Policy Violation Detected' if policy_violation == 1 else 'No Policy Violation' if policy_violation == 0 else 'Unable to determine label'}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
|
|
|
| 284 |
reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_selection, reasoning_trace)
|
|
|
|
| 285 |
reasoning_visible = bool(reasoning_trace and reasoning_trace.strip())
|
| 286 |
|
| 287 |
return (
|
| 288 |
example.get("input", ""),
|
| 289 |
+
policy,
|
| 290 |
example.get("model_selection", ""),
|
| 291 |
+
reasoning_effort_val,
|
| 292 |
example.get("max_tokens", 0),
|
| 293 |
example.get("temperature", 0.0),
|
| 294 |
example.get("top_p", 0.0),
|
| 295 |
example.get("system_prompt", ""),
|
| 296 |
example.get("response_format", ""),
|
| 297 |
+
format_model_info(model_selection, reasoning_effort_val),
|
|
|
|
| 298 |
label_text,
|
| 299 |
+
example.get("categories_and_reasoning", ""),
|
| 300 |
+
example.get("raw_response", ""),
|
| 301 |
gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
|
| 302 |
gr.update(value=reasoning_trace or "", visible=reasoning_visible),
|
| 303 |
)
|
| 304 |
except (ValueError, IndexError):
|
| 305 |
+
return [None] * 15
|
| 306 |
|
| 307 |
example_dropdown.change(
|
| 308 |
load_example_from_dataset,
|
ui/tab_dataset.py
CHANGED
|
@@ -8,7 +8,7 @@ import gradio as gr
|
|
| 8 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 9 |
|
| 10 |
from utils.dataset import get_dataset_repo_id, get_roost_dataset_repo_id, load_dataset_examples
|
| 11 |
-
from utils.helpers import check_token_availability, get_org_token, get_personal_token
|
| 12 |
from utils.model_interface import extract_model_id, get_model_info
|
| 13 |
|
| 14 |
|
|
@@ -26,12 +26,8 @@ def format_preview_markdown(example: dict) -> str:
|
|
| 26 |
model_name = model_info.get("name", model_id) if model_info else model_id or "Unknown"
|
| 27 |
|
| 28 |
# Format label with emoji
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
elif policy_violation == 0:
|
| 32 |
-
label_text = "✅ No Policy Violation"
|
| 33 |
-
else:
|
| 34 |
-
label_text = "⚠️ Unable to determine label"
|
| 35 |
|
| 36 |
# Truncate policy preview
|
| 37 |
policy_preview = policy # [:512] + "..." if len(policy) > 512 else policy
|
|
@@ -84,10 +80,7 @@ def build_dataset_tab() -> dict:
|
|
| 84 |
|
| 85 |
# Help text explaining token requirements
|
| 86 |
dataset_help_text = gr.Markdown(
|
| 87 |
-
value=(
|
| 88 |
-
f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
|
| 89 |
-
f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
|
| 90 |
-
),
|
| 91 |
visible=True
|
| 92 |
)
|
| 93 |
|
|
@@ -145,22 +138,14 @@ def build_dataset_tab() -> dict:
|
|
| 145 |
|
| 146 |
return "*Select an example to preview*"
|
| 147 |
|
| 148 |
-
def refresh_private(oauth_token: gr.OAuthToken | None = None):
|
| 149 |
-
"""Refresh private dataset."""
|
| 150 |
-
return refresh_dataset("private", oauth_token)
|
| 151 |
-
|
| 152 |
-
def refresh_roost(oauth_token: gr.OAuthToken | None = None):
|
| 153 |
-
"""Refresh ROOST dataset."""
|
| 154 |
-
return refresh_dataset("roost", oauth_token)
|
| 155 |
-
|
| 156 |
refresh_private_btn.click(
|
| 157 |
-
|
| 158 |
inputs=None, # OAuth token auto-injected
|
| 159 |
outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
|
| 160 |
)
|
| 161 |
|
| 162 |
refresh_roost_btn.click(
|
| 163 |
-
|
| 164 |
inputs=None, # OAuth token auto-injected
|
| 165 |
outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
|
| 166 |
)
|
|
|
|
| 8 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 9 |
|
| 10 |
from utils.dataset import get_dataset_repo_id, get_roost_dataset_repo_id, load_dataset_examples
|
| 11 |
+
from utils.helpers import check_token_availability, format_dataset_help_text, get_label_emoji, get_org_token, get_personal_token
|
| 12 |
from utils.model_interface import extract_model_id, get_model_info
|
| 13 |
|
| 14 |
|
|
|
|
| 26 |
model_name = model_info.get("name", model_id) if model_info else model_id or "Unknown"
|
| 27 |
|
| 28 |
# Format label with emoji
|
| 29 |
+
emoji = get_label_emoji(policy_violation)
|
| 30 |
+
label_text = f"{emoji} Policy Violation Detected" if policy_violation == 1 else f"{emoji} No Policy Violation" if policy_violation == 0 else f"{emoji} Unable to determine label"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
# Truncate policy preview
|
| 33 |
policy_preview = policy # [:512] + "..." if len(policy) > 512 else policy
|
|
|
|
| 80 |
|
| 81 |
# Help text explaining token requirements
|
| 82 |
dataset_help_text = gr.Markdown(
|
| 83 |
+
value=format_dataset_help_text(has_personal, has_org),
|
|
|
|
|
|
|
|
|
|
| 84 |
visible=True
|
| 85 |
)
|
| 86 |
|
|
|
|
| 138 |
|
| 139 |
return "*Select an example to preview*"
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
refresh_private_btn.click(
|
| 142 |
+
lambda oauth_token=None: refresh_dataset("private", oauth_token),
|
| 143 |
inputs=None, # OAuth token auto-injected
|
| 144 |
outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
|
| 145 |
)
|
| 146 |
|
| 147 |
refresh_roost_btn.click(
|
| 148 |
+
lambda oauth_token=None: refresh_dataset("roost", oauth_token),
|
| 149 |
inputs=None, # OAuth token auto-injected
|
| 150 |
outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
|
| 151 |
)
|
utils/constants.py
CHANGED
|
@@ -46,39 +46,6 @@ MODELS = [
|
|
| 46 |
},
|
| 47 |
]
|
| 48 |
|
| 49 |
-
UNUSED_MODELS = [
|
| 50 |
-
{
|
| 51 |
-
"name": "Qwen3-4B-Thinking",
|
| 52 |
-
"id": "Qwen/Qwen3-4B-Thinking-2507",
|
| 53 |
-
"is_thinking": True,
|
| 54 |
-
"supports_reasoning_level": False,
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"name": "Qwen3-4B-Instruct",
|
| 58 |
-
"id": "Qwen/Qwen3-4B-Instruct-2507",
|
| 59 |
-
"is_thinking": False,
|
| 60 |
-
"supports_reasoning_level": False,
|
| 61 |
-
},
|
| 62 |
-
{
|
| 63 |
-
"name": "Qwen3-30B-Instruct",
|
| 64 |
-
"id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
|
| 65 |
-
"is_thinking": False,
|
| 66 |
-
"supports_reasoning_level": False,
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"name": "GPT-OSS-20B",
|
| 70 |
-
"id": "openai/gpt-oss-20b",
|
| 71 |
-
"is_thinking": True,
|
| 72 |
-
"supports_reasoning_level": True,
|
| 73 |
-
},
|
| 74 |
-
{
|
| 75 |
-
"name": "GPT-OSS-120B",
|
| 76 |
-
"id": "openai/gpt-oss-120b",
|
| 77 |
-
"is_thinking": True,
|
| 78 |
-
"supports_reasoning_level": True,
|
| 79 |
-
},
|
| 80 |
-
]
|
| 81 |
-
|
| 82 |
# Reasoning effort levels for GPT-OSS
|
| 83 |
REASONING_EFFORTS = ["Low", "Medium", "High"]
|
| 84 |
|
|
@@ -105,10 +72,10 @@ RESPONSE_FORMAT = """## Response Format
|
|
| 105 |
|
| 106 |
Given a content item, please provide a JSON-formatted response with the following fields:
|
| 107 |
- `label`: INT - 1 for a policy violation, 0 for no violation
|
| 108 |
-
- `categories`: LIST[DCT] - a list of categories from the disallowed and allowed categories that the content item falls into along with the reasoning for each category. Each dict has the following keys:
|
| 109 |
- `category`: STR - the category from the policy
|
| 110 |
- `reasoning`: STR - the reasoning for why the content item falls into this category
|
| 111 |
-
- `policy_source`: STR - specific text from the policy that best supports the reasoning. Use [...] to concatenate multi-part citations. Make sure to quote the policy text exactly and include all relevant passages.
|
| 112 |
"""
|
| 113 |
|
| 114 |
# Test examples from notebook
|
|
|
|
| 46 |
},
|
| 47 |
]
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# Reasoning effort levels for GPT-OSS
|
| 50 |
REASONING_EFFORTS = ["Low", "Medium", "High"]
|
| 51 |
|
|
|
|
| 72 |
|
| 73 |
Given a content item, please provide a JSON-formatted response with the following fields:
|
| 74 |
- `label`: INT - 1 for a policy violation, 0 for no violation
|
| 75 |
+
- `categories`: LIST[DCT] - a list of categories (at least one) from the disallowed and allowed categories that the content item falls into along with the reasoning for each category. Each dict has the following keys:
|
| 76 |
- `category`: STR - the category from the policy
|
| 77 |
- `reasoning`: STR - the reasoning for why the content item falls into this category
|
| 78 |
+
- `policy_source`: STR - specific text from the policy that best supports the reasoning. Use [...] to concatenate multi-part citations. Prioritize policy explanatory text over repeating the category name. Make sure to quote the policy text exactly and include all relevant passages.
|
| 79 |
"""
|
| 80 |
|
| 81 |
# Test examples from notebook
|
utils/dataset.py
CHANGED
|
@@ -6,6 +6,7 @@ from datasets import Dataset, load_dataset
|
|
| 6 |
from huggingface_hub import HfApi
|
| 7 |
|
| 8 |
from utils.model_interface import extract_model_id, get_model_info
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
def get_username_from_token(token: str | None) -> str:
|
|
@@ -175,12 +176,7 @@ def load_dataset_examples(repo_id: str, token: str | None) -> tuple[list[dict],
|
|
| 175 |
policy_violation = example.get("policy_violation", -1)
|
| 176 |
|
| 177 |
# Get label emoji
|
| 178 |
-
|
| 179 |
-
label_emoji = "❌"
|
| 180 |
-
elif policy_violation == 0:
|
| 181 |
-
label_emoji = "✅"
|
| 182 |
-
else:
|
| 183 |
-
label_emoji = "⚠️"
|
| 184 |
|
| 185 |
# Extract model name
|
| 186 |
model_id = extract_model_id(model_selection)
|
|
|
|
| 6 |
from huggingface_hub import HfApi
|
| 7 |
|
| 8 |
from utils.model_interface import extract_model_id, get_model_info
|
| 9 |
+
from utils.helpers import get_label_emoji
|
| 10 |
|
| 11 |
|
| 12 |
def get_username_from_token(token: str | None) -> str:
|
|
|
|
| 176 |
policy_violation = example.get("policy_violation", -1)
|
| 177 |
|
| 178 |
# Get label emoji
|
| 179 |
+
label_emoji = get_label_emoji(policy_violation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
# Extract model name
|
| 182 |
model_id = extract_model_id(model_selection)
|
utils/helpers.py
CHANGED
|
@@ -6,6 +6,25 @@ import gradio as gr
|
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
def get_personal_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
|
| 10 |
"""
|
| 11 |
Get personal Hugging Face token from OAuth or .env fallback.
|
|
@@ -20,30 +39,18 @@ def get_personal_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, s
|
|
| 20 |
- hf_token: Token string if available, None otherwise
|
| 21 |
- status_message: Warning message if using local .env, empty string otherwise
|
| 22 |
"""
|
| 23 |
-
print(f"DEBUG: get_personal_token called with oauth_token type: {type(oauth_token)}")
|
| 24 |
-
|
| 25 |
if oauth_token is None or (isinstance(oauth_token, str) and oauth_token == "Log in to Hugging Face"):
|
| 26 |
# Try loading from .env file
|
| 27 |
-
|
| 28 |
-
load_dotenv()
|
| 29 |
-
hf_token = os.getenv("HF_TOKEN_MLSOC")
|
| 30 |
if hf_token is None:
|
| 31 |
-
print("DEBUG: HF_TOKEN_MLSOC not found in .env")
|
| 32 |
return None, ""
|
| 33 |
-
|
| 34 |
-
print(f"DEBUG: Loaded token from .env, length: {len(hf_token)}, first 4 chars: {hf_token[:4] if len(hf_token) >= 4 else hf_token}")
|
| 35 |
-
return hf_token, "\n⚠️ Using local .env file for token (not online)"
|
| 36 |
else:
|
| 37 |
# OAuthToken object
|
| 38 |
-
print(f"DEBUG: oauth_token is OAuthToken object")
|
| 39 |
token = oauth_token.token
|
| 40 |
-
print(f"DEBUG: Extracted token from OAuthToken, length: {len(token) if token else 0}, first 4 chars: {token[:4] if token and len(token) >= 4 else (token if token else 'None')}")
|
| 41 |
if not token or not token.strip():
|
| 42 |
-
|
| 43 |
-
load_dotenv()
|
| 44 |
-
hf_token = os.getenv("HF_TOKEN_MLSOC")
|
| 45 |
if hf_token:
|
| 46 |
-
print(f"DEBUG: Loaded token from .env (empty OAuth case), length: {len(hf_token)}, first 4 chars: {hf_token[:4] if len(hf_token) >= 4 else hf_token}")
|
| 47 |
return hf_token, "\n⚠️ Using local .env file for token (not online)"
|
| 48 |
return None, ""
|
| 49 |
return token, ""
|
|
@@ -64,12 +71,7 @@ def get_org_token() -> str | None:
|
|
| 64 |
return org_token
|
| 65 |
|
| 66 |
# Fall back to .env file
|
| 67 |
-
|
| 68 |
-
org_token = os.getenv("ROOST_TOKEN_FALLBACK")
|
| 69 |
-
if org_token:
|
| 70 |
-
return org_token
|
| 71 |
-
|
| 72 |
-
return None
|
| 73 |
|
| 74 |
|
| 75 |
def get_inference_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
|
|
@@ -111,7 +113,7 @@ def format_token_status(oauth_token: gr.OAuthToken | None) -> str:
|
|
| 111 |
has_personal, has_org = check_token_availability(oauth_token)
|
| 112 |
|
| 113 |
lines = [
|
| 114 |
-
"You can log in to
|
| 115 |
"### Token Status",
|
| 116 |
]
|
| 117 |
|
|
|
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
|
| 8 |
|
| 9 |
+
def _load_token_from_env(env_var: str) -> str | None:
|
| 10 |
+
"""Load token from .env file."""
|
| 11 |
+
load_dotenv()
|
| 12 |
+
return os.getenv(env_var)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_label_emoji(policy_violation: int) -> str:
|
| 16 |
+
"""Get emoji for policy violation label."""
|
| 17 |
+
return "❌" if policy_violation == 1 else "✅" if policy_violation == 0 else "⚠️"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def format_dataset_help_text(has_personal: bool, has_org: bool) -> str:
|
| 21 |
+
"""Format help text explaining dataset availability."""
|
| 22 |
+
return (
|
| 23 |
+
f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
|
| 24 |
+
f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
def get_personal_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
|
| 29 |
"""
|
| 30 |
Get personal Hugging Face token from OAuth or .env fallback.
|
|
|
|
| 39 |
- hf_token: Token string if available, None otherwise
|
| 40 |
- status_message: Warning message if using local .env, empty string otherwise
|
| 41 |
"""
|
|
|
|
|
|
|
| 42 |
if oauth_token is None or (isinstance(oauth_token, str) and oauth_token == "Log in to Hugging Face"):
|
| 43 |
# Try loading from .env file
|
| 44 |
+
hf_token = _load_token_from_env("HF_TOKEN_MLSOC")
|
|
|
|
|
|
|
| 45 |
if hf_token is None:
|
|
|
|
| 46 |
return None, ""
|
| 47 |
+
return hf_token, "\n⚠️ Using local .env file for token (not online)"
|
|
|
|
|
|
|
| 48 |
else:
|
| 49 |
# OAuthToken object
|
|
|
|
| 50 |
token = oauth_token.token
|
|
|
|
| 51 |
if not token or not token.strip():
|
| 52 |
+
hf_token = _load_token_from_env("HF_TOKEN_MLSOC")
|
|
|
|
|
|
|
| 53 |
if hf_token:
|
|
|
|
| 54 |
return hf_token, "\n⚠️ Using local .env file for token (not online)"
|
| 55 |
return None, ""
|
| 56 |
return token, ""
|
|
|
|
| 71 |
return org_token
|
| 72 |
|
| 73 |
# Fall back to .env file
|
| 74 |
+
return _load_token_from_env("ROOST_TOKEN_FALLBACK")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
|
| 77 |
def get_inference_token(oauth_token: gr.OAuthToken | None) -> tuple[str | None, str]:
|
|
|
|
| 113 |
has_personal, has_org = check_token_availability(oauth_token)
|
| 114 |
|
| 115 |
lines = [
|
| 116 |
+
"You can log in to your Hugging Face account to save your work in a private dataset and use the app for inference after the end of the hackathon.",
|
| 117 |
"### Token Status",
|
| 118 |
]
|
| 119 |
|