Spaces:
Runtime error
Runtime error
Fix: Always use Inference API on Spaces with better error handling
Browse files
app.py
CHANGED
|
@@ -170,10 +170,17 @@ EXAMPLE_QUESTIONS = [
|
|
| 170 |
class InferenceAPIBot:
|
| 171 |
"""Wrapper that uses Hugging Face Inference API instead of loading models locally"""
|
| 172 |
|
| 173 |
-
def __init__(self, bot: RAGBot, hf_token: str):
|
| 174 |
-
"""Initialize with a RAGBot (for vector DB) and HF token for Inference API"""
|
| 175 |
self.bot = bot # Use bot for vector DB and formatting
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
self.current_model = bot.args.model
|
| 178 |
# Don't set args as attribute - access via bot.args instead
|
| 179 |
logger.info(f"InferenceAPIBot initialized with model: {self.current_model}")
|
|
@@ -304,15 +311,30 @@ class GradioRAGInterface:
|
|
| 304 |
def __init__(self, initial_bot: RAGBot, use_inference_api: bool = False):
|
| 305 |
# Check if we should use Inference API (on Spaces)
|
| 306 |
if use_inference_api and HF_INFERENCE_AVAILABLE:
|
|
|
|
|
|
|
| 307 |
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
|
| 308 |
-
|
| 309 |
self.bot = InferenceAPIBot(initial_bot, hf_token)
|
| 310 |
self.use_inference_api = True
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
else:
|
| 317 |
self.bot = initial_bot
|
| 318 |
self.use_inference_api = False
|
|
@@ -398,6 +420,21 @@ class GradioRAGInterface:
|
|
| 398 |
if not question or not question.strip():
|
| 399 |
return "Please enter a question.", "N/A", "", "", ""
|
| 400 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
try:
|
| 402 |
start_time = time.time()
|
| 403 |
logger.info(f"Processing question: {question[:50]}...")
|
|
|
|
| 170 |
class InferenceAPIBot:
|
| 171 |
"""Wrapper that uses Hugging Face Inference API instead of loading models locally"""
|
| 172 |
|
| 173 |
+
def __init__(self, bot: RAGBot, hf_token: Optional[str] = None):
|
| 174 |
+
"""Initialize with a RAGBot (for vector DB) and optional HF token for Inference API"""
|
| 175 |
self.bot = bot # Use bot for vector DB and formatting
|
| 176 |
+
# Initialize client - token is optional for public models
|
| 177 |
+
if hf_token:
|
| 178 |
+
self.client = InferenceClient(api_key=hf_token)
|
| 179 |
+
logger.info("Using Inference API with provided token")
|
| 180 |
+
else:
|
| 181 |
+
# Try without token first (works for public models)
|
| 182 |
+
self.client = InferenceClient()
|
| 183 |
+
logger.info("Using Inference API without token (public models)")
|
| 184 |
self.current_model = bot.args.model
|
| 185 |
# Don't set args as attribute - access via bot.args instead
|
| 186 |
logger.info(f"InferenceAPIBot initialized with model: {self.current_model}")
|
|
|
|
| 311 |
def __init__(self, initial_bot: RAGBot, use_inference_api: bool = False):
|
| 312 |
# Check if we should use Inference API (on Spaces)
|
| 313 |
if use_inference_api and HF_INFERENCE_AVAILABLE:
|
| 314 |
+
# Try to get token, but it's optional for public models
|
| 315 |
+
# On Spaces, HF_TOKEN is automatically available
|
| 316 |
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
|
| 317 |
+
try:
|
| 318 |
self.bot = InferenceAPIBot(initial_bot, hf_token)
|
| 319 |
self.use_inference_api = True
|
| 320 |
+
if hf_token:
|
| 321 |
+
logger.info("Using Hugging Face Inference API with token")
|
| 322 |
+
else:
|
| 323 |
+
logger.info("Using Hugging Face Inference API without token (public models)")
|
| 324 |
+
except Exception as e:
|
| 325 |
+
logger.error(f"Failed to initialize Inference API: {e}")
|
| 326 |
+
# On Spaces, we MUST use Inference API - don't fall back to local
|
| 327 |
+
if IS_SPACES:
|
| 328 |
+
logger.error("Cannot use local models on Spaces. Please configure HF_TOKEN.")
|
| 329 |
+
raise RuntimeError(
|
| 330 |
+
"Inference API initialization failed on Spaces. "
|
| 331 |
+
"Please add HF_TOKEN as a secret in Space settings: "
|
| 332 |
+
"https://huggingface.co/spaces/alrahrooh/cgt-llm-chatbot-v2/settings"
|
| 333 |
+
)
|
| 334 |
+
else:
|
| 335 |
+
logger.warning("Falling back to local model")
|
| 336 |
+
self.bot = initial_bot
|
| 337 |
+
self.use_inference_api = False
|
| 338 |
else:
|
| 339 |
self.bot = initial_bot
|
| 340 |
self.use_inference_api = False
|
|
|
|
| 420 |
if not question or not question.strip():
|
| 421 |
return "Please enter a question.", "N/A", "", "", ""
|
| 422 |
|
| 423 |
+
# Check if we're on Spaces but not using Inference API
|
| 424 |
+
if IS_SPACES and not self.use_inference_api:
|
| 425 |
+
error_msg = """⚠️ **Configuration Error**
|
| 426 |
+
|
| 427 |
+
This Space is not configured to use the Hugging Face Inference API.
|
| 428 |
+
|
| 429 |
+
**To fix this:**
|
| 430 |
+
1. Go to your Space settings: https://huggingface.co/spaces/alrahrooh/cgt-llm-chatbot-v2/settings
|
| 431 |
+
2. Add a secret named `HF_TOKEN` with your Hugging Face token
|
| 432 |
+
3. Get your token from: https://huggingface.co/settings/tokens
|
| 433 |
+
4. Restart the Space
|
| 434 |
+
|
| 435 |
+
**Note:** The Inference API is required on Spaces because we cannot load models locally."""
|
| 436 |
+
return error_msg, "N/A", "", "", ""
|
| 437 |
+
|
| 438 |
try:
|
| 439 |
start_time = time.time()
|
| 440 |
logger.info(f"Processing question: {question[:50]}...")
|