Spaces:
Sleeping
Sleeping
| from src.semantic.relevance import SemanticRelevance | |
| from src.semantic.nli_distortion import DistortionDetectorNLI | |
| from src.sesgos.sesgos import BiasDetector | |
| from src.mineria.mining import MiningFeatures | |
| from datetime import datetime | |
| class AnalysisEngine: | |
| def __init__(self): | |
| self.relevance = SemanticRelevance() | |
| self.distortion = DistortionDetectorNLI() | |
| self.bias = BiasDetector() | |
| self.mining = MiningFeatures() | |
| def map_veredicto(self, decision: str): | |
| if not decision: | |
| return "indefinido" | |
| decision = decision.lower() | |
| if "gravemente" in decision: | |
| return "distorsion" | |
| if "parcial" in decision: | |
| return "parcial" | |
| if "neutral" in decision: | |
| return "neutral" | |
| return "correcto" | |
| def analyze(self, user_text: str, document_paragraphs: list, title: str): | |
| # 1️⃣ RELEVANCIA GENERAL | |
| rel = self.relevance.relate(user_text, title, document_paragraphs) | |
| # ✅ FILTRO ESPECIAL PARA RAG | |
| if title == "RAG_CONTEXT": | |
| best_score = rel["best_paragraph"]["score"] | |
| if best_score < 0.25: | |
| return { | |
| "status": "rag_irrelevante", | |
| "relevance": rel, | |
| "message": "El contexto recuperado por el RAG no es relevante al comentario del usuario.", | |
| } | |
| # ✅ FILTRO NORMAL PARA SCRAPING | |
| if title != "RAG_CONTEXT" and rel["decision_document"] in [ | |
| "no relacionado", | |
| "tangencial", | |
| ]: | |
| return { | |
| "status": "poco_relevante", | |
| "relevance": rel, | |
| } | |
| # 2️⃣ DISTORSIÓN (NLI + HEURÍSTICAS) | |
| distortion = self.distortion.analyze_user_comment( | |
| user_text, document_paragraphs | |
| ) | |
| # 3️⃣ SESGOS DEL DOCUMENTO | |
| full_document_text = ( | |
| title + ". " + " ".join(p.get("text", "") for p in document_paragraphs) | |
| ) | |
| biases_document = self.bias.detect(full_document_text) | |
| # 4️⃣ SESGOS DEL USUARIO | |
| biases_user = self.bias.detect(user_text) | |
| # 5️⃣ MINERÍA DE TEXTO | |
| mining = self.mining.extract(user_text) | |
| # ✅ ARMADO FINAL PROTEGIDO CONTRA ERRORES DE CLAVES | |
| contradicciones_formateadas = [] | |
| for d in distortion.get("detalles", []): | |
| scores = d.get("scores_detail", {}) or {} | |
| contradicciones_formateadas.append( | |
| { | |
| "parrafo": d.get("paragraph", ""), | |
| "oracion_usuario": d.get("sentence", ""), | |
| "claim_extraido": d.get("cleaned_claim"), | |
| "claim_transformado": d.get( | |
| "transformed_claim", d.get("cleaned_claim") | |
| ), | |
| "negacion_detectada": d.get("is_negation", False), | |
| "tipo_distorsion": d.get("best_label", "neutral"), | |
| "puntaje_principal": round(d.get("best_score", 0.0), 3), | |
| "puntajes_detallados": { | |
| "contradiccion": round(scores.get("contradiction", 0.0), 3), | |
| "neutral": round(scores.get("neutral", 0.0), 3), | |
| "coincidencia": round(scores.get("entailment", 0.0), 3), | |
| }, | |
| } | |
| ) | |
| return { | |
| "scraped_content": { | |
| "title": title, | |
| "url": "", | |
| "fecha_recoleccion": datetime.utcnow().isoformat(), | |
| "segmentos_contenido": [ | |
| { | |
| "type": p.get("type", "p"), | |
| "text": p.get("text", ""), | |
| } | |
| for p in document_paragraphs | |
| ], | |
| }, | |
| "analisis": { | |
| "document_sesgo": { | |
| "sesgos_encontrados": [ | |
| { | |
| "label": s.get("sesgo", s.get("label", "desconocido")), | |
| "score": s.get("confianza", s.get("score", 0.0)), | |
| } | |
| for s in biases_document.get("sesgos_detectados", []) | |
| ], | |
| "explicacion": "Sesgos detectados en el documento mediante análisis heurístico.", | |
| }, | |
| "user_sesgo": { | |
| "sesgos_encontrados": [ | |
| { | |
| "label": s.get("sesgo", s.get("label", "desconocido")), | |
| "score": s.get("confianza", s.get("score", 0.0)), | |
| } | |
| for s in biases_user.get("sesgos_detectados", []) | |
| ], | |
| "explicacion": "Sesgos detectados en el comentario del usuario.", | |
| }, | |
| "document_distorsion": { | |
| "veredicto": self.map_veredicto(distortion.get("decision")), | |
| "contradicciones": contradicciones_formateadas, | |
| }, | |
| "mineria": mining, | |
| }, | |
| } | |
| # return { | |
| # "scraped_content": { | |
| # "title": title, | |
| # "url": "", | |
| # "fecha_recoleccion": datetime.utcnow().isoformat(), | |
| # "segmentos_contenido": [ | |
| # {"type": p["type"], "text": p["text"]} for p in document_paragraphs | |
| # ], | |
| # }, | |
| # "analisis": { | |
| # "document_sesgo": { | |
| # "sesgos_encontrados": [ | |
| # { | |
| # "label": s.get("sesgo", s.get("label", "desconocido")), | |
| # "score": s.get("confianza", s.get("score", 0.0)), | |
| # } | |
| # for s in biases["sesgos_detectados"] | |
| # ], | |
| # "explicacion": "Sesgos detectados mediante análisis heurístico y de objetividad.", | |
| # }, | |
| # "document_distorsion": { | |
| # "veredicto": self.map_veredicto(distortion["decision"]), | |
| # "contradicciones": [ | |
| # { | |
| # "parrafo": d["paragraph"], | |
| # "oracion_usuario": d["sentence"], | |
| # "claim_extraido": d["cleaned_claim"], | |
| # "claim_transformado": d.get("transformed_claim", d["cleaned_claim"]), | |
| # "negacion_detectada": d["is_negation"], | |
| # "tipo_distorsion": d["best_label"], | |
| # "puntaje_principal": d["best_score"], | |
| # "puntajes_detallados": { | |
| # "contradiccion": round(d["scores_detail"]["contradiction"], 3), | |
| # "neutral": round(d["scores_detail"]["neutral"], 3), | |
| # "coincidencia": round(d["scores_detail"]["entailment"], 3), | |
| # }, | |
| # } | |
| # for d in distortion["detalles"] | |
| # ], | |
| # }, | |
| # "mineria": mining, | |
| # }, | |
| # } | |