|
|
""" |
|
|
test_multilingual_anomaly.py |
|
|
Test the multilingual anomaly detection fix. |
|
|
""" |
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
if sys.platform == 'win32': |
|
|
sys.stdout.reconfigure(encoding='utf-8') |
|
|
|
|
|
sys.path.insert(0, str(Path('.').resolve())) |
|
|
|
|
|
from src.graphs.vectorizationAgentGraph import graph |
|
|
from datetime import datetime |
|
|
|
|
|
test_texts = [ |
|
|
{"text": "URGENT: Massive landslide in Ratnapura!", "post_id": "EN_001"}, |
|
|
{"text": "Normal stock market day", "post_id": "EN_002"}, |
|
|
{"text": "ආර්ථික අර්බුදය නිසා ජනතාව දුෂ්කරතාවන්ට මුහුණ දෙයි", "post_id": "SI_001"}, |
|
|
{"text": "கொழும்பில் பெரும் மழை பெய்தது", "post_id": "TA_001"}, |
|
|
{"text": "Breaking news about corruption scandal", "post_id": "EN_003"}, |
|
|
] |
|
|
|
|
|
result = graph.invoke({ |
|
|
"input_texts": test_texts, |
|
|
"batch_id": datetime.now().strftime("%Y%m%d_%H%M%S"), |
|
|
}) |
|
|
|
|
|
print("=" * 60) |
|
|
print("MULTILINGUAL ANOMALY DETECTION TEST") |
|
|
print("=" * 60) |
|
|
|
|
|
anomaly_results = result.get("anomaly_results", {}) |
|
|
print(f"\nStatus: {anomaly_results.get('status')}") |
|
|
print(f"Model: {anomaly_results.get('model_used')}") |
|
|
print(f"Total analyzed: {anomaly_results.get('total_analyzed')}") |
|
|
|
|
|
anomalies = anomaly_results.get("anomalies", []) |
|
|
print(f"\nAnomalies found: {len(anomalies)}") |
|
|
for a in anomalies: |
|
|
method = a.get("detection_method", "unknown") |
|
|
print(f" - {a.get('post_id')}: {a.get('language')} | method: {method} | score: {a.get('anomaly_score', 0):.2f}") |
|
|
|
|
|
lang_results = result.get("language_detection_results", []) |
|
|
print(f"\nLanguage Detection:") |
|
|
for lr in lang_results: |
|
|
print(f" - {lr.get('post_id')}: {lr.get('language')} (conf: {lr.get('confidence', 0):.2f})") |
|
|
|
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("The fix ensures:") |
|
|
print(" - English texts: Isolation Forest ML model") |
|
|
print(" - Sinhala/Tamil: Magnitude-based heuristic (avoids false positives)") |
|
|
print("=" * 60) |
|
|
|