api-sesgos / src /engine /analysis_engine.py
RodrickMJ
Add application file
f1db1e3
from src.semantic.relevance import SemanticRelevance
from src.semantic.nli_distortion import DistortionDetectorNLI
from src.sesgos.sesgos import BiasDetector
from src.mineria.mining import MiningFeatures
from datetime import datetime
class AnalysisEngine:
def __init__(self):
self.relevance = SemanticRelevance()
self.distortion = DistortionDetectorNLI()
self.bias = BiasDetector()
self.mining = MiningFeatures()
def map_veredicto(self, decision: str):
if not decision:
return "indefinido"
decision = decision.lower()
if "gravemente" in decision:
return "distorsion"
if "parcial" in decision:
return "parcial"
if "neutral" in decision:
return "neutral"
return "correcto"
def analyze(self, user_text: str, document_paragraphs: list, title: str):
# 1️⃣ RELEVANCIA GENERAL
rel = self.relevance.relate(user_text, title, document_paragraphs)
# ✅ FILTRO ESPECIAL PARA RAG
if title == "RAG_CONTEXT":
best_score = rel["best_paragraph"]["score"]
if best_score < 0.25:
return {
"status": "rag_irrelevante",
"relevance": rel,
"message": "El contexto recuperado por el RAG no es relevante al comentario del usuario.",
}
# ✅ FILTRO NORMAL PARA SCRAPING
if title != "RAG_CONTEXT" and rel["decision_document"] in [
"no relacionado",
"tangencial",
]:
return {
"status": "poco_relevante",
"relevance": rel,
}
# 2️⃣ DISTORSIÓN (NLI + HEURÍSTICAS)
distortion = self.distortion.analyze_user_comment(
user_text, document_paragraphs
)
# 3️⃣ SESGOS DEL DOCUMENTO
full_document_text = (
title + ". " + " ".join(p.get("text", "") for p in document_paragraphs)
)
biases_document = self.bias.detect(full_document_text)
# 4️⃣ SESGOS DEL USUARIO
biases_user = self.bias.detect(user_text)
# 5️⃣ MINERÍA DE TEXTO
mining = self.mining.extract(user_text)
# ✅ ARMADO FINAL PROTEGIDO CONTRA ERRORES DE CLAVES
contradicciones_formateadas = []
for d in distortion.get("detalles", []):
scores = d.get("scores_detail", {}) or {}
contradicciones_formateadas.append(
{
"parrafo": d.get("paragraph", ""),
"oracion_usuario": d.get("sentence", ""),
"claim_extraido": d.get("cleaned_claim"),
"claim_transformado": d.get(
"transformed_claim", d.get("cleaned_claim")
),
"negacion_detectada": d.get("is_negation", False),
"tipo_distorsion": d.get("best_label", "neutral"),
"puntaje_principal": round(d.get("best_score", 0.0), 3),
"puntajes_detallados": {
"contradiccion": round(scores.get("contradiction", 0.0), 3),
"neutral": round(scores.get("neutral", 0.0), 3),
"coincidencia": round(scores.get("entailment", 0.0), 3),
},
}
)
return {
"scraped_content": {
"title": title,
"url": "",
"fecha_recoleccion": datetime.utcnow().isoformat(),
"segmentos_contenido": [
{
"type": p.get("type", "p"),
"text": p.get("text", ""),
}
for p in document_paragraphs
],
},
"analisis": {
"document_sesgo": {
"sesgos_encontrados": [
{
"label": s.get("sesgo", s.get("label", "desconocido")),
"score": s.get("confianza", s.get("score", 0.0)),
}
for s in biases_document.get("sesgos_detectados", [])
],
"explicacion": "Sesgos detectados en el documento mediante análisis heurístico.",
},
"user_sesgo": {
"sesgos_encontrados": [
{
"label": s.get("sesgo", s.get("label", "desconocido")),
"score": s.get("confianza", s.get("score", 0.0)),
}
for s in biases_user.get("sesgos_detectados", [])
],
"explicacion": "Sesgos detectados en el comentario del usuario.",
},
"document_distorsion": {
"veredicto": self.map_veredicto(distortion.get("decision")),
"contradicciones": contradicciones_formateadas,
},
"mineria": mining,
},
}
# return {
# "scraped_content": {
# "title": title,
# "url": "",
# "fecha_recoleccion": datetime.utcnow().isoformat(),
# "segmentos_contenido": [
# {"type": p["type"], "text": p["text"]} for p in document_paragraphs
# ],
# },
# "analisis": {
# "document_sesgo": {
# "sesgos_encontrados": [
# {
# "label": s.get("sesgo", s.get("label", "desconocido")),
# "score": s.get("confianza", s.get("score", 0.0)),
# }
# for s in biases["sesgos_detectados"]
# ],
# "explicacion": "Sesgos detectados mediante análisis heurístico y de objetividad.",
# },
# "document_distorsion": {
# "veredicto": self.map_veredicto(distortion["decision"]),
# "contradicciones": [
# {
# "parrafo": d["paragraph"],
# "oracion_usuario": d["sentence"],
# "claim_extraido": d["cleaned_claim"],
# "claim_transformado": d.get("transformed_claim", d["cleaned_claim"]),
# "negacion_detectada": d["is_negation"],
# "tipo_distorsion": d["best_label"],
# "puntaje_principal": d["best_score"],
# "puntajes_detallados": {
# "contradiccion": round(d["scores_detail"]["contradiction"], 3),
# "neutral": round(d["scores_detail"]["neutral"], 3),
# "coincidencia": round(d["scores_detail"]["entailment"], 3),
# },
# }
# for d in distortion["detalles"]
# ],
# },
# "mineria": mining,
# },
# }