Spaces:

Rodricklw
/

api-sesgos

Sleeping

File size: 7,331 Bytes

f1db1e3

from src.semantic.relevance import SemanticRelevance
from src.semantic.nli_distortion import DistortionDetectorNLI
from src.sesgos.sesgos import BiasDetector
from src.mineria.mining import MiningFeatures
from datetime import datetime


class AnalysisEngine:
    def __init__(self):
        self.relevance = SemanticRelevance()
        self.distortion = DistortionDetectorNLI()
        self.bias = BiasDetector()
        self.mining = MiningFeatures()

    def map_veredicto(self, decision: str):
        if not decision:
            return "indefinido"

        decision = decision.lower()

        if "gravemente" in decision:
            return "distorsion"
        if "parcial" in decision:
            return "parcial"
        if "neutral" in decision:
            return "neutral"

        return "correcto"

    def analyze(self, user_text: str, document_paragraphs: list, title: str):
        # 1️⃣ RELEVANCIA GENERAL
        rel = self.relevance.relate(user_text, title, document_paragraphs)

        # ✅ FILTRO ESPECIAL PARA RAG
        if title == "RAG_CONTEXT":
            best_score = rel["best_paragraph"]["score"]
            if best_score < 0.25:
                return {
                    "status": "rag_irrelevante",
                    "relevance": rel,
                    "message": "El contexto recuperado por el RAG no es relevante al comentario del usuario.",
                }

        # ✅ FILTRO NORMAL PARA SCRAPING
        if title != "RAG_CONTEXT" and rel["decision_document"] in [
            "no relacionado",
            "tangencial",
        ]:
            return {
                "status": "poco_relevante",
                "relevance": rel,
            }

        # 2️⃣ DISTORSIÓN (NLI + HEURÍSTICAS)
        distortion = self.distortion.analyze_user_comment(
            user_text, document_paragraphs
        )

        # 3️⃣ SESGOS DEL DOCUMENTO
        full_document_text = (
            title + ". " + " ".join(p.get("text", "") for p in document_paragraphs)
        )
        biases_document = self.bias.detect(full_document_text)

        # 4️⃣ SESGOS DEL USUARIO
        biases_user = self.bias.detect(user_text)

        # 5️⃣ MINERÍA DE TEXTO
        mining = self.mining.extract(user_text)

        # ✅ ARMADO FINAL PROTEGIDO CONTRA ERRORES DE CLAVES
        contradicciones_formateadas = []

        for d in distortion.get("detalles", []):
            scores = d.get("scores_detail", {}) or {}

            contradicciones_formateadas.append(
                {
                    "parrafo": d.get("paragraph", ""),
                    "oracion_usuario": d.get("sentence", ""),
                    "claim_extraido": d.get("cleaned_claim"),
                    "claim_transformado": d.get(
                        "transformed_claim", d.get("cleaned_claim")
                    ),
                    "negacion_detectada": d.get("is_negation", False),
                    "tipo_distorsion": d.get("best_label", "neutral"),
                    "puntaje_principal": round(d.get("best_score", 0.0), 3),
                    "puntajes_detallados": {
                        "contradiccion": round(scores.get("contradiction", 0.0), 3),
                        "neutral": round(scores.get("neutral", 0.0), 3),
                        "coincidencia": round(scores.get("entailment", 0.0), 3),
                    },
                }
            )

        return {
            "scraped_content": {
                "title": title,
                "url": "",
                "fecha_recoleccion": datetime.utcnow().isoformat(),
                "segmentos_contenido": [
                    {
                        "type": p.get("type", "p"),
                        "text": p.get("text", ""),
                    }
                    for p in document_paragraphs
                ],
            },
            "analisis": {
                "document_sesgo": {
                    "sesgos_encontrados": [
                        {
                            "label": s.get("sesgo", s.get("label", "desconocido")),
                            "score": s.get("confianza", s.get("score", 0.0)),
                        }
                        for s in biases_document.get("sesgos_detectados", [])
                    ],
                    "explicacion": "Sesgos detectados en el documento mediante análisis heurístico.",
                },
                "user_sesgo": {
                    "sesgos_encontrados": [
                        {
                            "label": s.get("sesgo", s.get("label", "desconocido")),
                            "score": s.get("confianza", s.get("score", 0.0)),
                        }
                        for s in biases_user.get("sesgos_detectados", [])
                    ],
                    "explicacion": "Sesgos detectados en el comentario del usuario.",
                },
                "document_distorsion": {
                    "veredicto": self.map_veredicto(distortion.get("decision")),
                    "contradicciones": contradicciones_formateadas,
                },
                "mineria": mining,
            },
        }

        # return {
        #     "scraped_content": {
        #         "title": title,
        #         "url": "",
        #         "fecha_recoleccion": datetime.utcnow().isoformat(),
        #         "segmentos_contenido": [
        #             {"type": p["type"], "text": p["text"]} for p in document_paragraphs
        #         ],
        #     },
        #     "analisis": {
        #         "document_sesgo": {
        #             "sesgos_encontrados": [
        #                 {
        #                     "label": s.get("sesgo", s.get("label", "desconocido")),
        #                     "score": s.get("confianza", s.get("score", 0.0)),
        #                 }
        #                 for s in biases["sesgos_detectados"]
        #             ],
        #             "explicacion": "Sesgos detectados mediante análisis heurístico y de objetividad.",
        #         },
        #         "document_distorsion": {
        #             "veredicto": self.map_veredicto(distortion["decision"]),
        #             "contradicciones": [
        #                 {
        #                     "parrafo": d["paragraph"],
        #                     "oracion_usuario": d["sentence"],
        #                     "claim_extraido": d["cleaned_claim"],
        #                     "claim_transformado": d.get("transformed_claim", d["cleaned_claim"]),
        #                     "negacion_detectada": d["is_negation"],
        #                     "tipo_distorsion": d["best_label"],
        #                     "puntaje_principal": d["best_score"],
        #                     "puntajes_detallados": {
        #                         "contradiccion": round(d["scores_detail"]["contradiction"], 3),
        #                         "neutral": round(d["scores_detail"]["neutral"], 3),
        #                         "coincidencia": round(d["scores_detail"]["entailment"], 3),
        #                     },
        #                 }
        #                 for d in distortion["detalles"]
        #             ],
        #         },
        #         "mineria": mining,
        #     },
        # }