Starberry15 commited on
Commit
fa9ab75
Β·
verified Β·
1 Parent(s): 6f1b533

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +138 -205
src/streamlit_app.py CHANGED
@@ -1,5 +1,5 @@
1
  # ======================================================
2
- # πŸ“š Handbook Assistant β€” Handbook-only, source-cited answers
3
  # ======================================================
4
  # Requirements:
5
  # pip install streamlit python-dotenv PyPDF2 numpy faiss-cpu scikit-learn huggingface-hub streamlit-chat sentence-transformers
@@ -15,305 +15,238 @@ import numpy as np
15
  import streamlit as st
16
  from dotenv import load_dotenv
17
  import PyPDF2
18
- from huggingface_hub import InferenceClient, login
19
  from streamlit_chat import message as st_message
20
- from sentence_transformers import SentenceTransformer # ⚑ NEW: local embedder
 
 
 
 
 
 
 
 
21
 
22
  # ======================================================
23
- # βš™οΈ PAGE CONFIG β€” must be first Streamlit call
24
  # ======================================================
25
  st.set_page_config(page_title="πŸ“š Handbook Assistant", page_icon="πŸ“˜", layout="wide")
26
- st.title("πŸ“š Handbook Assistant β€” Handbook-only, source-cited answers")
27
- st.caption("Place your handbook PDF(s) in the same folder as this app (e.g., handbook.pdf).")
28
 
29
- # ======================================================
30
- # πŸ” ENVIRONMENT SETUP
31
- # ======================================================
32
  load_dotenv()
33
- HF_TOKEN = os.getenv("HF_TOKEN")
34
 
35
- if HF_TOKEN:
36
- try:
37
- login(HF_TOKEN)
38
- except Exception:
39
- pass
40
- else:
41
- st.info("HF_TOKEN not found. Hugging Face model calls will be limited. Local embeddings are used instead.")
42
-
43
- # ======================================================
44
- # πŸ“¦ MODEL SETTINGS
45
- # ======================================================
46
- DEFAULT_OPEN_SOURCE_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
47
- AVAILABLE_MODELS = {
48
- "Mistral 7B Instruct (Open Source – recommended)": DEFAULT_OPEN_SOURCE_MODEL,
49
- }
50
 
51
  # ======================================================
52
- # 🧭 SIDEBAR
53
  # ======================================================
54
  with st.sidebar:
55
  st.header("βš™οΈ Settings")
56
- selected_model_name = st.selectbox("Model", options=list(AVAILABLE_MODELS.keys()), index=0)
57
- selected_model_id = AVAILABLE_MODELS[selected_model_name]
58
  similarity_threshold = st.slider("Similarity threshold", 0.3, 0.95, 0.62, 0.01)
59
- top_k = st.slider("Number of retrieved chunks (top k)", 1, 10, 4)
60
- chunk_size_chars = st.number_input("Chunk size (characters)", 400, 2500, 1200, 100)
61
- chunk_overlap = st.number_input("Chunk overlap (chars)", 20, 600, 150, 10)
62
- regenerate_index = st.button("πŸ” Rebuild handbook index")
63
- st.markdown("**Storage:** FAISS index + metadata saved to disk for faster restarts.")
64
 
65
- # ======================================================
66
- # πŸ”— HUGGING FACE CLIENT
67
- # ======================================================
68
- hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else None
 
 
 
69
 
70
  # ======================================================
71
- # πŸ“˜ FILE UTILITIES
72
  # ======================================================
73
- HAND_INDEX_FN = "handbook_faiss.index"
74
- HAND_META_FN = "handbook_metadata.json"
75
- HAND_EMB_DIM_FN = "handbook_emb_dim.json"
76
-
77
- try:
78
- import faiss
79
- except Exception:
80
- faiss = None
81
 
82
  def find_pdfs(patterns=["handbook*.pdf", "*.pdf"]) -> List[str]:
83
- """Find handbook PDFs robustly."""
84
  base_dir = os.path.dirname(os.path.abspath(__file__))
85
  files = []
86
  for patt in patterns:
87
- matched = glob.glob(os.path.join(base_dir, patt))
88
- if matched:
89
- files = matched
90
- break
91
- if not files:
92
- for patt in patterns:
93
- matched = glob.glob(patt)
94
- if matched:
95
- files = matched
96
- break
97
- if "uploaded_pdf_path" in st.session_state and os.path.exists(st.session_state.uploaded_pdf_path):
98
  files = [st.session_state.uploaded_pdf_path]
99
- return sorted(files)
100
 
101
  def load_pdf_texts_with_page_info(pdf_paths: List[str]) -> List[Dict[str, Any]]:
102
- pages = []
 
103
  for p in pdf_paths:
104
  try:
105
  with open(p, "rb") as f:
106
  reader = PyPDF2.PdfReader(f)
107
  for i, page in enumerate(reader.pages):
108
- text = page.extract_text() or ""
 
 
 
109
  if text.strip():
110
- pages.append({"filename": os.path.basename(p), "page": i + 1, "text": text})
111
  except Exception as e:
112
- st.warning(f"Failed reading {p}: {e}")
113
- return pages
114
 
115
- def chunk_pages_into_segments(pages: List[Dict[str, Any]], chunk_size: int, overlap: int):
 
116
  chunks = []
117
  for pg in pages:
118
- text, filename, page_no = pg["text"], pg["filename"], pg["page"]
 
119
  start, chunk_id = 0, 0
120
  while start < len(text):
121
  end = min(start + chunk_size, len(text))
122
  seg = text[start:end].strip()
123
- if len(seg) >= 30:
124
  chunks.append({
125
  "filename": filename,
126
  "page": page_no,
127
  "chunk_id": f"{filename}_p{page_no}_c{chunk_id}",
128
  "text": seg
129
  })
130
- chunk_id += 1
131
  start = end - overlap
132
  if start < 0:
133
  start = 0
134
  return chunks
135
 
136
- # ======================================================
137
- # ⚑ LOCAL EMBEDDING SETUP
138
- # ======================================================
139
- @st.cache_resource(show_spinner=False)
140
- def get_local_embedder():
141
- """Load and cache the fast local sentence transformer."""
142
- return SentenceTransformer("all-MiniLM-L6-v2")
143
-
144
- def local_embeddings_for_texts(texts: List[str]) -> List[np.ndarray]:
145
- """Generate normalized MiniLM embeddings quickly."""
146
  model = get_local_embedder()
147
- embs = model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
148
- return [np.array(e, dtype=np.float32) for e in embs]
149
-
150
- def fallback_vectorize(texts: List[str]):
151
- """Lightweight TF-IDF-like fallback (no external model)."""
152
- token_doc_freq, token_lists = {}, []
153
- for t in texts:
154
- tokens = [w.lower().strip(".,:;()[]{}\"'") for w in t.split() if len(w) > 2]
155
- token_lists.append(tokens)
156
- for tok in set(tokens):
157
- token_doc_freq[tok] = token_doc_freq.get(tok, 0) + 1
158
- token_index = {tok: i for i, tok in enumerate(token_doc_freq.keys())}
159
- n_docs = len(texts)
160
- vecs = []
161
- for tokens in token_lists:
162
- vec = np.zeros(len(token_index), dtype=np.float32)
163
- for tok in tokens:
164
- vec[token_index[tok]] += 1.0
165
- for tok, idx in token_index.items():
166
- df = token_doc_freq[tok]
167
- if df > 0:
168
- vec[idx] *= math.log((1 + n_docs) / (1 + df))
169
- norm = np.linalg.norm(vec)
170
- if norm > 0:
171
- vec /= norm
172
- vecs.append(vec)
173
- return vecs
174
-
175
- # ======================================================
176
- # 🧠 FAISS HELPERS
177
- # ======================================================
178
- def build_faiss_index(embeddings: List[np.ndarray]):
179
  if faiss is None:
180
- raise RuntimeError("faiss not installed. Run: pip install faiss-cpu")
181
- arr = np.vstack(embeddings).astype("float32")
182
- arr /= np.linalg.norm(arr, axis=1, keepdims=True)
183
- index = faiss.IndexFlatIP(arr.shape[1])
184
- index.add(arr)
185
- return index, arr.shape[1]
186
-
187
- def save_index_and_metadata(index, metadata, dim):
188
  faiss.write_index(index, HAND_INDEX_FN)
189
- json.dump(metadata, open(HAND_META_FN, "w", encoding="utf-8"), indent=2)
190
- json.dump({"dim": dim}, open(HAND_EMB_DIM_FN, "w", encoding="utf-8"))
 
 
191
 
192
  def load_index_and_metadata():
193
  if not (os.path.exists(HAND_INDEX_FN) and os.path.exists(HAND_META_FN)):
194
  return None, None
195
  index = faiss.read_index(HAND_INDEX_FN)
196
- metadata = json.load(open(HAND_META_FN, "r", encoding="utf-8"))
197
- return index, metadata
 
 
 
198
 
199
  # ======================================================
200
- # πŸ” INDEX BUILDER
201
  # ======================================================
202
  def ensure_handbook_index(rebuild=False):
203
- if st.session_state.get("handbook_ready") and not rebuild:
 
204
  return
 
205
  pdfs = find_pdfs()
206
  if not pdfs:
207
- st.error("No handbook PDF found.")
 
208
  return
209
- if not rebuild and os.path.exists(HAND_INDEX_FN):
210
- try:
211
- index, meta = load_index_and_metadata()
 
 
212
  st.session_state.faiss_index = index
213
- st.session_state.metadata = meta
214
  st.session_state.handbook_ready = True
215
- st.success(f"Loaded existing FAISS index ({len(meta)} chunks).")
216
  return
217
- except Exception as e:
218
- st.warning(f"Reload failed: {e}. Rebuilding…")
219
 
220
- with st.spinner("βš™οΈ Building FAISS index locally with MiniLM…"):
221
- pages = load_pdf_texts_with_page_info(pdfs)
222
- chunks = chunk_pages_into_segments(pages, chunk_size_chars, chunk_overlap)
223
- texts = [c["text"] for c in chunks]
224
- try:
225
- embs = local_embeddings_for_texts(texts)
226
- except Exception as e:
227
- st.warning(f"Local MiniLM failed ({e}); using fallback.")
228
- embs = fallback_vectorize(texts)
229
- index, dim = build_faiss_index(embs)
230
- save_index_and_metadata(index, chunks, dim)
231
- st.session_state.faiss_index = index
232
- st.session_state.metadata = chunks
233
- st.session_state.handbook_ready = True
234
- st.success(f"βœ… Indexed {len(chunks)} chunks.")
235
-
236
- if regenerate_index:
237
- ensure_handbook_index(True)
238
- elif "handbook_ready" not in st.session_state:
239
- ensure_handbook_index(False)
 
240
 
241
  # ======================================================
242
- # πŸ”Ž RETRIEVAL + CHAT
243
  # ======================================================
244
- def embed_query(query: str):
245
- try:
246
- emb = local_embeddings_for_texts([query])[0]
247
- except Exception:
248
- emb = fallback_vectorize([query])[0]
249
- return emb / (np.linalg.norm(emb) or 1)
250
 
251
  def retrieve_top_chunks(query: str, k: int):
252
  index = st.session_state.get("faiss_index")
253
  metadata = st.session_state.get("metadata", [])
254
- if index is None or not metadata:
255
  return [], []
256
  q_emb = embed_query(query).reshape(1, -1)
257
  D, I = index.search(q_emb, k)
258
  results = [metadata[i] for i in I[0] if i < len(metadata)]
259
  return results, D[0].tolist()
260
 
261
- PROMPT_TEMPLATE = """You are HandbookAssistant. Answer **only** from the handbook excerpts below.
262
- If the answer isn't found, say exactly:
263
- "Sorry, I can only answer questions based on the school's handbook."
264
- Always cite sources like (Source: <filename>, page <page>, chunk <chunk_id>)."""
265
-
266
- def build_prompt(chunks, question):
267
- excerpts = "\n\n".join([f"--- {c['chunk_id']} ({c['filename']}, page {c['page']}) ---\n{c['text']}" for c in chunks])
268
- return f"{PROMPT_TEMPLATE}\n\n{excerpts}\n\nUser: {question}\nAnswer:"
269
-
270
- def call_hf_model(prompt, model_id):
271
- resp = hf_client.text_generation(model=model_id, inputs=prompt, max_new_tokens=512, temperature=0.2)
272
- if isinstance(resp, dict) and "generated_text" in resp:
273
- return resp["generated_text"]
274
- if isinstance(resp, list) and resp and "generated_text" in resp[0]:
275
- return resp[0]["generated_text"]
276
- return str(resp)
277
-
278
  # ======================================================
279
- # πŸ’¬ CHAT INTERFACE
280
  # ======================================================
281
- if "chat_history" not in st.session_state:
282
- st.session_state.chat_history = []
283
 
284
  st.divider()
285
  st.subheader("πŸ’¬ Ask the handbook")
286
 
287
- user_input = st.chat_input("Ask a question about the handbook…")
288
-
289
  if user_input:
290
  st_message(user_input, is_user=True)
291
- retrieved, scores = retrieve_top_chunks(user_input, int(top_k))
292
- if not retrieved or (scores and max(scores) < similarity_threshold):
293
- reply = "Sorry, I can only answer questions based on the school's handbook."
 
 
294
  else:
295
- prompt = build_prompt(retrieved, user_input)
296
- try:
297
- reply = call_hf_model(prompt, selected_model_id)
298
- except Exception as e:
299
- reply = f"⚠️ Model error: {e}"
300
- st_message(reply, is_user=False)
301
- st.session_state.chat_history.append({"role": "user", "content": user_input})
302
- st.session_state.chat_history.append({"role": "assistant", "content": reply})
303
 
304
  # ======================================================
305
- # πŸ—‚οΈ CHAT HISTORY + TOOLS
306
  # ======================================================
307
  st.divider()
308
  st.subheader("Conversation History")
309
- for i, msg in enumerate(st.session_state.chat_history):
310
- st_message(msg["content"], is_user=(msg["role"] == "user"), key=f"hist_{i}")
311
-
312
- col1, col2 = st.columns([1, 1])
313
- with col1:
314
- if st.button("πŸ”„ Reset chat"):
315
- st.session_state.chat_history = []
316
- st.success("Chat reset.")
317
- with col2:
318
- transcript = "\n\n".join([f"{m['role'].upper()}: {m['content']}" for m in st.session_state.chat_history])
319
- st.download_button("πŸ“₯ Download transcript", data=transcript, file_name="handbook_transcript.txt")
 
1
  # ======================================================
2
+ # πŸ“˜ Handbook Assistant (FAST OPTIMIZED VERSION)
3
  # ======================================================
4
  # Requirements:
5
  # pip install streamlit python-dotenv PyPDF2 numpy faiss-cpu scikit-learn huggingface-hub streamlit-chat sentence-transformers
 
15
  import streamlit as st
16
  from dotenv import load_dotenv
17
  import PyPDF2
 
18
  from streamlit_chat import message as st_message
19
+
20
+ # Optional fast embedding model
21
+ from sentence_transformers import SentenceTransformer
22
+
23
+ # Try FAISS
24
+ try:
25
+ import faiss
26
+ except Exception:
27
+ faiss = None
28
 
29
  # ======================================================
30
+ # βš™οΈ CONFIGURATION
31
  # ======================================================
32
  st.set_page_config(page_title="πŸ“š Handbook Assistant", page_icon="πŸ“˜", layout="wide")
33
+ st.title("πŸ“š Handbook Assistant β€” Fast Local Version")
34
+ st.caption("Place your handbook PDF (e.g., handbook.pdf) beside this script or upload below.")
35
 
 
 
 
36
  load_dotenv()
 
37
 
38
+ # File names for saving
39
+ HAND_INDEX_FN = "handbook_faiss.index"
40
+ HAND_META_FN = "handbook_metadata.json"
41
+ HAND_EMB_DIM_FN = "handbook_emb_dim.json"
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # ======================================================
44
+ # βš™οΈ SIDEBAR SETTINGS
45
  # ======================================================
46
  with st.sidebar:
47
  st.header("βš™οΈ Settings")
48
+
 
49
  similarity_threshold = st.slider("Similarity threshold", 0.3, 0.95, 0.62, 0.01)
50
+ top_k = st.slider("Top chunks retrieved", 1, 10, 4)
51
+ chunk_size_chars = st.number_input("Chunk size (chars)", min_value=400, max_value=3000, value=2000, step=100)
52
+ chunk_overlap = st.number_input("Chunk overlap (chars)", min_value=20, max_value=600, value=100, step=10)
53
+ regenerate_index = st.button("πŸ” Rebuild handbook index (force re-embed)")
54
+ st.markdown("**Storage:** Cached FAISS index + metadata for fast restarts.")
55
 
56
+ uploaded_pdf = st.file_uploader("πŸ“„ Upload handbook PDF", type=["pdf"])
57
+ if uploaded_pdf:
58
+ temp_path = os.path.join(os.path.dirname(__file__), uploaded_pdf.name)
59
+ with open(temp_path, "wb") as f:
60
+ f.write(uploaded_pdf.getbuffer())
61
+ st.session_state.uploaded_pdf_path = temp_path
62
+ st.success(f"βœ… Uploaded and saved: {uploaded_pdf.name}")
63
 
64
  # ======================================================
65
+ # 🧩 UTILITIES
66
  # ======================================================
67
+ @st.cache_resource(show_spinner=False)
68
+ def get_local_embedder():
69
+ """Load MiniLM model (only once)."""
70
+ return SentenceTransformer("all-MiniLM-L6-v2")
 
 
 
 
71
 
72
  def find_pdfs(patterns=["handbook*.pdf", "*.pdf"]) -> List[str]:
73
+ """Find handbook PDFs in script folder or uploaded ones."""
74
  base_dir = os.path.dirname(os.path.abspath(__file__))
75
  files = []
76
  for patt in patterns:
77
+ files += glob.glob(os.path.join(base_dir, patt))
78
+ if not files and "uploaded_pdf_path" in st.session_state:
 
 
 
 
 
 
 
 
 
79
  files = [st.session_state.uploaded_pdf_path]
80
+ return sorted(list(set(files)))
81
 
82
  def load_pdf_texts_with_page_info(pdf_paths: List[str]) -> List[Dict[str, Any]]:
83
+ """Extract text from each page with filename and page number."""
84
+ all_pages = []
85
  for p in pdf_paths:
86
  try:
87
  with open(p, "rb") as f:
88
  reader = PyPDF2.PdfReader(f)
89
  for i, page in enumerate(reader.pages):
90
+ try:
91
+ text = page.extract_text() or ""
92
+ except Exception:
93
+ text = ""
94
  if text.strip():
95
+ all_pages.append({"filename": os.path.basename(p), "page": i + 1, "text": text})
96
  except Exception as e:
97
+ st.warning(f"⚠️ Failed to read {p}: {e}")
98
+ return all_pages
99
 
100
+ def chunk_pages_into_segments(pages: List[Dict[str, Any]], chunk_size: int, overlap: int) -> List[Dict[str, Any]]:
101
+ """Split long page text into overlapping chunks."""
102
  chunks = []
103
  for pg in pages:
104
+ text = pg["text"]
105
+ filename, page_no = pg["filename"], pg["page"]
106
  start, chunk_id = 0, 0
107
  while start < len(text):
108
  end = min(start + chunk_size, len(text))
109
  seg = text[start:end].strip()
110
+ if len(seg) > 50:
111
  chunks.append({
112
  "filename": filename,
113
  "page": page_no,
114
  "chunk_id": f"{filename}_p{page_no}_c{chunk_id}",
115
  "text": seg
116
  })
117
+ chunk_id += 1
118
  start = end - overlap
119
  if start < 0:
120
  start = 0
121
  return chunks
122
 
123
+ def embed_texts(texts: List[str], batch_size: int = 16) -> np.ndarray:
124
+ """Fast local embedding using MiniLM in batches."""
 
 
 
 
 
 
 
 
125
  model = get_local_embedder()
126
+ all_embeddings = []
127
+ for i in range(0, len(texts), batch_size):
128
+ batch = texts[i:i + batch_size]
129
+ emb = model.encode(batch, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)
130
+ all_embeddings.append(emb)
131
+ return np.vstack(all_embeddings)
132
+
133
+ def build_faiss_index(embeddings: np.ndarray):
134
+ """Build FAISS cosine index."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  if faiss is None:
136
+ raise RuntimeError("❌ FAISS not installed (pip install faiss-cpu)")
137
+ d = embeddings.shape[1]
138
+ index = faiss.IndexFlatIP(d)
139
+ index.add(embeddings)
140
+ return index, d
141
+
142
+ def save_index_and_metadata(index, metadata, emb_dim: int):
 
143
  faiss.write_index(index, HAND_INDEX_FN)
144
+ with open(HAND_META_FN, "w", encoding="utf-8") as f:
145
+ json.dump(metadata, f, indent=2)
146
+ with open(HAND_EMB_DIM_FN, "w") as f:
147
+ json.dump({"dim": emb_dim}, f)
148
 
149
  def load_index_and_metadata():
150
  if not (os.path.exists(HAND_INDEX_FN) and os.path.exists(HAND_META_FN)):
151
  return None, None
152
  index = faiss.read_index(HAND_INDEX_FN)
153
+ with open(HAND_META_FN, "r", encoding="utf-8") as f:
154
+ meta = json.load(f)
155
+ with open(HAND_EMB_DIM_FN, "r") as f:
156
+ emb_dim = json.load(f)["dim"]
157
+ return index, meta
158
 
159
  # ======================================================
160
+ # 🧠 INDEX BUILDER
161
  # ======================================================
162
  def ensure_handbook_index(rebuild=False):
163
+ """Build or load handbook FAISS index efficiently."""
164
+ if "handbook_ready" in st.session_state and st.session_state.handbook_ready and not rebuild:
165
  return
166
+
167
  pdfs = find_pdfs()
168
  if not pdfs:
169
+ st.error("❌ No handbook PDF found.")
170
+ st.session_state.handbook_ready = False
171
  return
172
+
173
+ # Try loading cached index
174
+ if os.path.exists(HAND_INDEX_FN) and not rebuild:
175
+ index, metadata = load_index_and_metadata()
176
+ if index is not None:
177
  st.session_state.faiss_index = index
178
+ st.session_state.metadata = metadata
179
  st.session_state.handbook_ready = True
180
+ st.success(f"βœ… Loaded FAISS index with {len(metadata)} chunks.")
181
  return
 
 
182
 
183
+ st.info("βš™οΈ Building FAISS index locally with MiniLM… this may take 30–60 seconds.")
184
+ start_time = time.time()
185
+
186
+ pages = load_pdf_texts_with_page_info(pdfs)
187
+ chunks = chunk_pages_into_segments(pages, int(chunk_size_chars), int(chunk_overlap))
188
+ if not chunks:
189
+ st.error("❌ No readable text found in the handbook.")
190
+ return
191
+
192
+ texts = [c["text"] for c in chunks]
193
+ embeddings = embed_texts(texts, batch_size=16)
194
+
195
+ index, emb_dim = build_faiss_index(embeddings)
196
+ save_index_and_metadata(index, chunks, emb_dim)
197
+
198
+ st.session_state.faiss_index = index
199
+ st.session_state.metadata = chunks
200
+ st.session_state.handbook_ready = True
201
+
202
+ elapsed = time.time() - start_time
203
+ st.success(f"βœ… Handbook indexed in {elapsed:.1f} seconds ({len(chunks)} chunks).")
204
 
205
  # ======================================================
206
+ # πŸ” RETRIEVAL
207
  # ======================================================
208
+ def embed_query(query: str) -> np.ndarray:
209
+ model = get_local_embedder()
210
+ emb = model.encode([query], convert_to_numpy=True, normalize_embeddings=True)[0]
211
+ return emb.astype("float32")
 
 
212
 
213
  def retrieve_top_chunks(query: str, k: int):
214
  index = st.session_state.get("faiss_index")
215
  metadata = st.session_state.get("metadata", [])
216
+ if not index or not metadata:
217
  return [], []
218
  q_emb = embed_query(query).reshape(1, -1)
219
  D, I = index.search(q_emb, k)
220
  results = [metadata[i] for i in I[0] if i < len(metadata)]
221
  return results, D[0].tolist()
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  # ======================================================
224
+ # πŸ—£οΈ CHAT INTERFACE
225
  # ======================================================
226
+ ensure_handbook_index(rebuild=regenerate_index)
 
227
 
228
  st.divider()
229
  st.subheader("πŸ’¬ Ask the handbook")
230
 
231
+ user_input = st.chat_input("Ask a question about the handbook...")
 
232
  if user_input:
233
  st_message(user_input, is_user=True)
234
+
235
+ retrieved, scores = retrieve_top_chunks(user_input, top_k)
236
+ if not retrieved or max(scores) < similarity_threshold:
237
+ reply = "Sorry, I can only answer based on the handbook, and I couldn’t find relevant information."
238
+ st_message(reply, is_user=False)
239
  else:
240
+ answer = "Based on the handbook:\n\n"
241
+ for r, s in zip(retrieved, scores):
242
+ short = (r["text"][:300] + "…") if len(r["text"]) > 300 else r["text"]
243
+ answer += f"πŸ“„ **{r['filename']}**, page {r['page']} β€” (score {s:.3f})\n> {short}\n\n"
244
+ st_message(answer.strip(), is_user=False)
 
 
 
245
 
246
  # ======================================================
247
+ # 🧾 HISTORY & EXPORT
248
  # ======================================================
249
  st.divider()
250
  st.subheader("Conversation History")
251
+ if "chat_history" not in st.session_state:
252
+ st.session_state.chat_history = []