Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -372,50 +372,82 @@ def build_result_row(row, score):
|
|
| 372 |
|
| 373 |
@app.post("/hadith/search")
|
| 374 |
def hadith_search(req: HadithSearchRequest, request: Request):
|
| 375 |
-
|
|
|
|
| 376 |
lang = req.lang or "ar"
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
# للأحاديث: نحتاج الترجمة للعربية للبحث في الداتا
|
| 380 |
if lang != "ar":
|
| 381 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
|
| 383 |
-
|
| 384 |
-
kind, msg = run_hadith_guard(q_norm)
|
| 385 |
if kind:
|
| 386 |
-
|
|
|
|
|
|
|
| 387 |
|
| 388 |
words = q_norm.split()
|
| 389 |
-
if len(words) <
|
| 390 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
if sims[i] < 0.1: continue
|
| 408 |
-
row = df_all_hadith.iloc[i]
|
| 409 |
-
res = build_result_row(row, sims[i]*100)
|
| 410 |
-
# ترجمة الجواب للغة المستخدم
|
| 411 |
-
if lang != "ar":
|
| 412 |
-
res["matn"] = translate_wrapper(res["matn"], "ar", lang) + DISCLAIMERS.get(lang,"")
|
| 413 |
-
results.append(res)
|
| 414 |
-
|
| 415 |
-
if not results:
|
| 416 |
-
raise HTTPException(status_code=404, detail=translate_error_detail("لم يتم العثور على حديث", lang))
|
| 417 |
-
|
| 418 |
-
return {"query": req.query, "results": results}
|
| 419 |
|
| 420 |
@app.post("/hadith/by_id")
|
| 421 |
def hadith_by_id(req: HadithByIdRequest, request: Request):
|
|
|
|
| 372 |
|
| 373 |
@app.post("/hadith/search")
|
| 374 |
def hadith_search(req: HadithSearchRequest, request: Request):
|
| 375 |
+
ip = request.client.host or "unknown"
|
| 376 |
+
check_rate_limit(ip)
|
| 377 |
lang = req.lang or "ar"
|
| 378 |
+
q = (req.query or "").strip()
|
| 379 |
+
q_for_search = q
|
|
|
|
| 380 |
if lang != "ar":
|
| 381 |
+
q_for_search = translate_wrapper(q, source=lang, target="ar")
|
| 382 |
+
print(f"[HADITH-LANG] {q} -> {q_for_search}")
|
| 383 |
+
|
| 384 |
+
q_norm = normalize_ar(q_for_search)
|
| 385 |
+
|
| 386 |
+
# === [فحص سلامة تحميل البيانات لمنع خطأ 500 إذا فشل التحميل] ===
|
| 387 |
+
# التحقق من أن مصفوفة TFIDF تم تحميلها بنجاح في دالة startup
|
| 388 |
+
if not hasattr(vectorizer, 'vocabulary_') or tfidf_matrix is None or df_all.empty:
|
| 389 |
+
error_ar = "عفواً، بيانات الأحاديث لم يتم تحميلها بشكل صحيح على الخادم (خطأ في الإعداد)."
|
| 390 |
+
error_detail = translate_error_detail(error_ar, lang)
|
| 391 |
+
print(f"[ERROR] Hadith data (TFIDF/df_all) not loaded or globals missing.")
|
| 392 |
+
raise HTTPException(status_code=500, detail=error_detail)
|
| 393 |
+
# ===============================================================
|
| 394 |
|
| 395 |
+
kind, msg = run_guard(q_for_search, q_norm)
|
|
|
|
| 396 |
if kind:
|
| 397 |
+
error_detail = translate_error_detail(msg, lang)
|
| 398 |
+
|
| 399 |
+
raise HTTPException(status_code=400, detail=error_detail)
|
| 400 |
|
| 401 |
words = q_norm.split()
|
| 402 |
+
if len(words) < 4:
|
| 403 |
+
|
| 404 |
+
error_ar = "أدخل 4 كلمات على الأقل من نص الحديث (بالعربية)."
|
| 405 |
+
error_detail = translate_error_detail(error_ar, lang)
|
| 406 |
+
raise HTTPException(status_code=400, detail=error_detail)
|
| 407 |
+
if len(words) > 15:
|
| 408 |
+
q_norm = " ".join(words[:15])
|
| 409 |
|
| 410 |
+
try:
|
| 411 |
+
# ** منطق البحث الأساسي (نفس المنطق السابق) **
|
| 412 |
+
mask = filter_mask_by_sources(df_all, req.sources)
|
| 413 |
+
idxs = df_all[mask].index.values
|
| 414 |
+
if len(idxs) == 0:
|
| 415 |
+
error_ar = "لا توجد مصادر مطابقة للفلتر."
|
| 416 |
+
error_detail = translate_error_detail(error_ar, lang)
|
| 417 |
+
raise HTTPException(status_code=404, detail=error_detail)
|
| 418 |
+
|
| 419 |
+
q_vec = vectorizer.transform([q_norm])
|
| 420 |
+
sims = linear_kernel(q_vec, tfidf_matrix[idxs]).flatten()
|
| 421 |
+
|
| 422 |
+
top_idx_local = sims.argsort()[::-1][:20]
|
| 423 |
+
candidates = []
|
| 424 |
+
for i in top_idx_local:
|
| 425 |
+
gi = idxs[i]
|
| 426 |
+
row = df_all.iloc[gi]
|
| 427 |
+
base_score = sims[i] * 100
|
| 428 |
+
fuzz_score = fuzz.token_set_ratio(q_norm, row["matn_clean"])
|
| 429 |
+
source_bonus = 15 if row["source"] == "صحيح البخاري" else (10 if row["source"] == "صحيح مسلم" else 0)
|
| 430 |
+
grading_bonus = 5 if "صحيح" in (row.get("grading","") or "") else 0
|
| 431 |
+
final_score = base_score * 0.5 + fuzz_score * 0.5 + source_bonus + grading_bonus
|
| 432 |
+
candidates.append((row, final_score))
|
| 433 |
+
|
| 434 |
+
candidates = sorted(candidates, key=lambda x: x[1], reverse=True)[:(req.top_k or 5)]
|
| 435 |
+
|
| 436 |
+
results = []
|
| 437 |
+
for row, score in candidates:
|
| 438 |
+
res = build_result_row(row, score)
|
| 439 |
+
if lang != "ar":
|
| 440 |
+
res["matn"] = translate_wrapper(res["matn"], "ar", lang) + DISCLAIMERS.get(lang, "")
|
| 441 |
+
results.append(res)
|
| 442 |
+
|
| 443 |
+
return {"query": q, "results": results}
|
| 444 |
|
| 445 |
+
except Exception as e:
|
| 446 |
+
# رسالة خطأ 500 عامة في حالة فشل أي جزء من منطق البحث
|
| 447 |
+
print(f"[HADITH-SEARCH-CRITICAL-FAIL] Error: {e}")
|
| 448 |
+
error_ar = "حدث خطأ داخلي غير متوقع أثناء معالجة البحث عن الأحاديث."
|
| 449 |
+
error_detail = translate_error_detail(error_ar, lang)
|
| 450 |
+
raise HTTPException(status_code=500, detail=error_detail)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 451 |
|
| 452 |
@app.post("/hadith/by_id")
|
| 453 |
def hadith_by_id(req: HadithByIdRequest, request: Request):
|