Hothaifa commited on
Commit
9fa6bd9
·
verified ·
1 Parent(s): 5ff10bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -36
app.py CHANGED
@@ -372,50 +372,82 @@ def build_result_row(row, score):
372
 
373
  @app.post("/hadith/search")
374
  def hadith_search(req: HadithSearchRequest, request: Request):
375
- check_rate_limit(request.client.host or "unknown")
 
376
  lang = req.lang or "ar"
377
- q_search = req.query
378
-
379
- # للأحاديث: نحتاج الترجمة للعربية للبحث في الداتا
380
  if lang != "ar":
381
- q_search = translate_wrapper(q_search, lang, "ar")
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
- q_norm = normalize_ar(q_search)
384
- kind, msg = run_hadith_guard(q_norm)
385
  if kind:
386
- raise HTTPException(status_code=400, detail=translate_error_detail(msg, lang))
 
 
387
 
388
  words = q_norm.split()
389
- if len(words) < 2: # خففنا الشرط قليلاً
390
- raise HTTPException(status_code=400, detail=translate_error_detail("كلمات البحث قليلة جداً", lang))
 
 
 
 
 
391
 
392
- if df_all_hadith.empty:
393
- raise HTTPException(status_code=503, detail="قاعدة بيانات الأحاديث غير جاهزة")
394
-
395
- # الفلترة بالمصدر
396
- search_df = df_all_hadith
397
- if req.sources:
398
- # منطق بسيط للفلترة
399
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
- q_vec = vectorizer.transform([q_norm])
402
- sims = linear_kernel(q_vec, tfidf_matrix).flatten()
403
- top_ind = sims.argsort()[::-1][:req.top_k]
404
-
405
- results = []
406
- for i in top_ind:
407
- if sims[i] < 0.1: continue
408
- row = df_all_hadith.iloc[i]
409
- res = build_result_row(row, sims[i]*100)
410
- # ترجمة الجواب للغة المستخدم
411
- if lang != "ar":
412
- res["matn"] = translate_wrapper(res["matn"], "ar", lang) + DISCLAIMERS.get(lang,"")
413
- results.append(res)
414
-
415
- if not results:
416
- raise HTTPException(status_code=404, detail=translate_error_detail("لم يتم العثور على حديث", lang))
417
-
418
- return {"query": req.query, "results": results}
419
 
420
  @app.post("/hadith/by_id")
421
  def hadith_by_id(req: HadithByIdRequest, request: Request):
 
372
 
373
  @app.post("/hadith/search")
374
  def hadith_search(req: HadithSearchRequest, request: Request):
375
+ ip = request.client.host or "unknown"
376
+ check_rate_limit(ip)
377
  lang = req.lang or "ar"
378
+ q = (req.query or "").strip()
379
+ q_for_search = q
 
380
  if lang != "ar":
381
+ q_for_search = translate_wrapper(q, source=lang, target="ar")
382
+ print(f"[HADITH-LANG] {q} -> {q_for_search}")
383
+
384
+ q_norm = normalize_ar(q_for_search)
385
+
386
+ # === [فحص سلامة تحميل البيانات لمنع خطأ 500 إذا فشل التحميل] ===
387
+ # التحقق من أن مصفوفة TFIDF تم تحميلها بنجاح في دالة startup
388
+ if not hasattr(vectorizer, 'vocabulary_') or tfidf_matrix is None or df_all.empty:
389
+ error_ar = "عفواً، بيانات الأحاديث لم يتم تحميلها بشكل صحيح على الخادم (خطأ في الإعداد)."
390
+ error_detail = translate_error_detail(error_ar, lang)
391
+ print(f"[ERROR] Hadith data (TFIDF/df_all) not loaded or globals missing.")
392
+ raise HTTPException(status_code=500, detail=error_detail)
393
+ # ===============================================================
394
 
395
+ kind, msg = run_guard(q_for_search, q_norm)
 
396
  if kind:
397
+ error_detail = translate_error_detail(msg, lang)
398
+
399
+ raise HTTPException(status_code=400, detail=error_detail)
400
 
401
  words = q_norm.split()
402
+ if len(words) < 4:
403
+
404
+ error_ar = "أدخل 4 كلمات على الأقل من نص الحديث (بالعربية)."
405
+ error_detail = translate_error_detail(error_ar, lang)
406
+ raise HTTPException(status_code=400, detail=error_detail)
407
+ if len(words) > 15:
408
+ q_norm = " ".join(words[:15])
409
 
410
+ try:
411
+ # ** منطق البحث الأساسي (نفس المنطق السابق) **
412
+ mask = filter_mask_by_sources(df_all, req.sources)
413
+ idxs = df_all[mask].index.values
414
+ if len(idxs) == 0:
415
+ error_ar = "لا توجد مصادر مطابقة للفلتر."
416
+ error_detail = translate_error_detail(error_ar, lang)
417
+ raise HTTPException(status_code=404, detail=error_detail)
418
+
419
+ q_vec = vectorizer.transform([q_norm])
420
+ sims = linear_kernel(q_vec, tfidf_matrix[idxs]).flatten()
421
+
422
+ top_idx_local = sims.argsort()[::-1][:20]
423
+ candidates = []
424
+ for i in top_idx_local:
425
+ gi = idxs[i]
426
+ row = df_all.iloc[gi]
427
+ base_score = sims[i] * 100
428
+ fuzz_score = fuzz.token_set_ratio(q_norm, row["matn_clean"])
429
+ source_bonus = 15 if row["source"] == "صحيح البخاري" else (10 if row["source"] == "صحيح مسلم" else 0)
430
+ grading_bonus = 5 if "صحيح" in (row.get("grading","") or "") else 0
431
+ final_score = base_score * 0.5 + fuzz_score * 0.5 + source_bonus + grading_bonus
432
+ candidates.append((row, final_score))
433
+
434
+ candidates = sorted(candidates, key=lambda x: x[1], reverse=True)[:(req.top_k or 5)]
435
+
436
+ results = []
437
+ for row, score in candidates:
438
+ res = build_result_row(row, score)
439
+ if lang != "ar":
440
+ res["matn"] = translate_wrapper(res["matn"], "ar", lang) + DISCLAIMERS.get(lang, "")
441
+ results.append(res)
442
+
443
+ return {"query": q, "results": results}
444
 
445
+ except Exception as e:
446
+ # رسالة خطأ 500 عامة في حالة فشل أي جزء من منطق البحث
447
+ print(f"[HADITH-SEARCH-CRITICAL-FAIL] Error: {e}")
448
+ error_ar = "حدث خطأ داخلي غير متوقع أثناء معالجة البحث عن الأحاديث."
449
+ error_detail = translate_error_detail(error_ar, lang)
450
+ raise HTTPException(status_code=500, detail=error_detail)
 
 
 
 
 
 
 
 
 
 
 
 
451
 
452
  @app.post("/hadith/by_id")
453
  def hadith_by_id(req: HadithByIdRequest, request: Request):