from src.semantic.relevance import SemanticRelevance from src.semantic.nli_distortion import DistortionDetectorNLI from src.sesgos.sesgos import BiasDetector from src.mineria.mining import MiningFeatures from datetime import datetime class AnalysisEngine: def __init__(self): self.relevance = SemanticRelevance() self.distortion = DistortionDetectorNLI() self.bias = BiasDetector() self.mining = MiningFeatures() def map_veredicto(self, decision: str): if not decision: return "indefinido" decision = decision.lower() if "gravemente" in decision: return "distorsion" if "parcial" in decision: return "parcial" if "neutral" in decision: return "neutral" return "correcto" def analyze(self, user_text: str, document_paragraphs: list, title: str): # 1️⃣ RELEVANCIA GENERAL rel = self.relevance.relate(user_text, title, document_paragraphs) # ✅ FILTRO ESPECIAL PARA RAG if title == "RAG_CONTEXT": best_score = rel["best_paragraph"]["score"] if best_score < 0.25: return { "status": "rag_irrelevante", "relevance": rel, "message": "El contexto recuperado por el RAG no es relevante al comentario del usuario.", } # ✅ FILTRO NORMAL PARA SCRAPING if title != "RAG_CONTEXT" and rel["decision_document"] in [ "no relacionado", "tangencial", ]: return { "status": "poco_relevante", "relevance": rel, } # 2️⃣ DISTORSIÓN (NLI + HEURÍSTICAS) distortion = self.distortion.analyze_user_comment( user_text, document_paragraphs ) # 3️⃣ SESGOS DEL DOCUMENTO full_document_text = ( title + ". " + " ".join(p.get("text", "") for p in document_paragraphs) ) biases_document = self.bias.detect(full_document_text) # 4️⃣ SESGOS DEL USUARIO biases_user = self.bias.detect(user_text) # 5️⃣ MINERÍA DE TEXTO mining = self.mining.extract(user_text) # ✅ ARMADO FINAL PROTEGIDO CONTRA ERRORES DE CLAVES contradicciones_formateadas = [] for d in distortion.get("detalles", []): scores = d.get("scores_detail", {}) or {} contradicciones_formateadas.append( { "parrafo": d.get("paragraph", ""), "oracion_usuario": d.get("sentence", ""), "claim_extraido": d.get("cleaned_claim"), "claim_transformado": d.get( "transformed_claim", d.get("cleaned_claim") ), "negacion_detectada": d.get("is_negation", False), "tipo_distorsion": d.get("best_label", "neutral"), "puntaje_principal": round(d.get("best_score", 0.0), 3), "puntajes_detallados": { "contradiccion": round(scores.get("contradiction", 0.0), 3), "neutral": round(scores.get("neutral", 0.0), 3), "coincidencia": round(scores.get("entailment", 0.0), 3), }, } ) return { "scraped_content": { "title": title, "url": "", "fecha_recoleccion": datetime.utcnow().isoformat(), "segmentos_contenido": [ { "type": p.get("type", "p"), "text": p.get("text", ""), } for p in document_paragraphs ], }, "analisis": { "document_sesgo": { "sesgos_encontrados": [ { "label": s.get("sesgo", s.get("label", "desconocido")), "score": s.get("confianza", s.get("score", 0.0)), } for s in biases_document.get("sesgos_detectados", []) ], "explicacion": "Sesgos detectados en el documento mediante análisis heurístico.", }, "user_sesgo": { "sesgos_encontrados": [ { "label": s.get("sesgo", s.get("label", "desconocido")), "score": s.get("confianza", s.get("score", 0.0)), } for s in biases_user.get("sesgos_detectados", []) ], "explicacion": "Sesgos detectados en el comentario del usuario.", }, "document_distorsion": { "veredicto": self.map_veredicto(distortion.get("decision")), "contradicciones": contradicciones_formateadas, }, "mineria": mining, }, } # return { # "scraped_content": { # "title": title, # "url": "", # "fecha_recoleccion": datetime.utcnow().isoformat(), # "segmentos_contenido": [ # {"type": p["type"], "text": p["text"]} for p in document_paragraphs # ], # }, # "analisis": { # "document_sesgo": { # "sesgos_encontrados": [ # { # "label": s.get("sesgo", s.get("label", "desconocido")), # "score": s.get("confianza", s.get("score", 0.0)), # } # for s in biases["sesgos_detectados"] # ], # "explicacion": "Sesgos detectados mediante análisis heurístico y de objetividad.", # }, # "document_distorsion": { # "veredicto": self.map_veredicto(distortion["decision"]), # "contradicciones": [ # { # "parrafo": d["paragraph"], # "oracion_usuario": d["sentence"], # "claim_extraido": d["cleaned_claim"], # "claim_transformado": d.get("transformed_claim", d["cleaned_claim"]), # "negacion_detectada": d["is_negation"], # "tipo_distorsion": d["best_label"], # "puntaje_principal": d["best_score"], # "puntajes_detallados": { # "contradiccion": round(d["scores_detail"]["contradiction"], 3), # "neutral": round(d["scores_detail"]["neutral"], 3), # "coincidencia": round(d["scores_detail"]["entailment"], 3), # }, # } # for d in distortion["detalles"] # ], # }, # "mineria": mining, # }, # }