Spaces:

Hothaifa
/

Fatwa-hadith-assistant

Sleeping

App Files Files Community

Hothaifa commited on Nov 23, 2025

Commit

9fa6bd9

verified ·

1 Parent(s): 5ff10bc

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -36

app.py CHANGED Viewed

@@ -372,50 +372,82 @@ def build_result_row(row, score):
 @app.post("/hadith/search")
 def hadith_search(req: HadithSearchRequest, request: Request):
-    check_rate_limit(request.client.host or "unknown")
     lang = req.lang or "ar"
-    q_search = req.query
-    # للأحاديث: نحتاج الترجمة للعربية للبحث في الداتا
     if lang != "ar":
-        q_search = translate_wrapper(q_search, lang, "ar")
-    q_norm = normalize_ar(q_search)
-    kind, msg = run_hadith_guard(q_norm)
     if kind:
-        raise HTTPException(status_code=400, detail=translate_error_detail(msg, lang))
     words = q_norm.split()
-    if len(words) < 2: # خففنا الشرط قليلاً
-         raise HTTPException(status_code=400, detail=translate_error_detail("كلمات البحث قليلة جداً", lang))
-    if df_all_hadith.empty:
-        raise HTTPException(status_code=503, detail="قاعدة بيانات الأحاديث غير جاهزة")
-    # الفلترة بالمصدر
-    search_df = df_all_hadith
-    if req.sources:
-        # منطق بسيط للفلترة
-        pass
-    q_vec = vectorizer.transform([q_norm])
-    sims = linear_kernel(q_vec, tfidf_matrix).flatten()
-    top_ind = sims.argsort()[::-1][:req.top_k]
-    results = []
-    for i in top_ind:
-        if sims[i] < 0.1: continue
-        row = df_all_hadith.iloc[i]
-        res = build_result_row(row, sims[i]*100)
-        # ترجمة الجواب للغة المستخدم
-        if lang != "ar":
-            res["matn"] = translate_wrapper(res["matn"], "ar", lang) + DISCLAIMERS.get(lang,"")
-        results.append(res)
-    if not results:
-        raise HTTPException(status_code=404, detail=translate_error_detail("لم يتم العثور على حديث", lang))
-    return {"query": req.query, "results": results}
 @app.post("/hadith/by_id")
 def hadith_by_id(req: HadithByIdRequest, request: Request):

 @app.post("/hadith/search")
 def hadith_search(req: HadithSearchRequest, request: Request):
+    ip = request.client.host or "unknown"
+    check_rate_limit(ip)
     lang = req.lang or "ar"
+    q = (req.query or "").strip()
+    q_for_search = q
     if lang != "ar":
+        q_for_search = translate_wrapper(q, source=lang, target="ar")
+        print(f"[HADITH-LANG] {q} -> {q_for_search}")
+    q_norm = normalize_ar(q_for_search)
+    # === [فحص سلامة تحميل البيانات لمنع خطأ 500 إذا فشل التحميل] ===
+    # التحقق من أن مصفوفة TFIDF تم تحميلها بنجاح في دالة startup
+    if not hasattr(vectorizer, 'vocabulary_') or tfidf_matrix is None or df_all.empty:
+        error_ar = "عفواً، بيانات الأحاديث لم يتم تحميلها بشكل صحيح على الخادم (خطأ في الإعداد)."
+        error_detail = translate_error_detail(error_ar, lang)
+        print(f"[ERROR] Hadith data (TFIDF/df_all) not loaded or globals missing.")
+        raise HTTPException(status_code=500, detail=error_detail)
+    # ===============================================================
+    kind, msg = run_guard(q_for_search, q_norm)
     if kind:
+        error_detail = translate_error_detail(msg, lang)
+        raise HTTPException(status_code=400, detail=error_detail)
     words = q_norm.split()
+    if len(words) < 4:
+        error_ar = "أدخل 4 كلمات على الأقل من نص الحديث (بالعربية)."
+        error_detail = translate_error_detail(error_ar, lang)
+        raise HTTPException(status_code=400, detail=error_detail)
+        if len(words) > 15:
+            q_norm = " ".join(words[:15])
+    try:
+        # ** منطق البحث الأساسي (نفس المنطق السابق) **
+        mask = filter_mask_by_sources(df_all, req.sources)
+        idxs = df_all[mask].index.values
+        if len(idxs) == 0:
+            error_ar = "لا توجد مصادر مطابقة للفلتر."
+            error_detail = translate_error_detail(error_ar, lang)
+            raise HTTPException(status_code=404, detail=error_detail)
+        q_vec = vectorizer.transform([q_norm])
+        sims = linear_kernel(q_vec, tfidf_matrix[idxs]).flatten()
+        top_idx_local = sims.argsort()[::-1][:20]
+        candidates = []
+        for i in top_idx_local:
+            gi = idxs[i]
+            row = df_all.iloc[gi]
+            base_score = sims[i] * 100
+            fuzz_score = fuzz.token_set_ratio(q_norm, row["matn_clean"])
+            source_bonus = 15 if row["source"] == "صحيح البخاري" else (10 if row["source"] == "صحيح مسلم" else 0)
+            grading_bonus = 5 if "صحيح" in (row.get("grading","") or "") else 0
+            final_score = base_score * 0.5 + fuzz_score * 0.5 + source_bonus + grading_bonus
+            candidates.append((row, final_score))
+        candidates = sorted(candidates, key=lambda x: x[1], reverse=True)[:(req.top_k or 5)]
+        results = []
+        for row, score in candidates:
+            res = build_result_row(row, score)
+            if lang != "ar":
+                res["matn"] = translate_wrapper(res["matn"], "ar", lang) + DISCLAIMERS.get(lang, "")
+            results.append(res)
+        return {"query": q, "results": results}
+    except Exception as e:
+        # رسالة خطأ 500 عامة في حالة فشل أي جزء من منطق البحث
+        print(f"[HADITH-SEARCH-CRITICAL-FAIL] Error: {e}")
+        error_ar = "حدث خطأ داخلي غير متوقع أثناء معالجة البحث عن الأحاديث."
+        error_detail = translate_error_detail(error_ar, lang)
+        raise HTTPException(status_code=500, detail=error_detail)
 @app.post("/hadith/by_id")
 def hadith_by_id(req: HadithByIdRequest, request: Request):