Spaces:

Rodricklw
/

api-sesgos

Sleeping

api-sesgos / src /engine /analysis_engine.py

RodrickMJ

Add application file

f1db1e3 15 days ago

7.33 kB

	from src.semantic.relevance import SemanticRelevance
	from src.semantic.nli_distortion import DistortionDetectorNLI
	from src.sesgos.sesgos import BiasDetector
	from src.mineria.mining import MiningFeatures
	from datetime import datetime


	class AnalysisEngine:
	def __init__(self):
	self.relevance = SemanticRelevance()
	self.distortion = DistortionDetectorNLI()
	self.bias = BiasDetector()
	self.mining = MiningFeatures()

	def map_veredicto(self, decision: str):
	if not decision:
	return "indefinido"

	decision = decision.lower()

	if "gravemente" in decision:
	return "distorsion"
	if "parcial" in decision:
	return "parcial"
	if "neutral" in decision:
	return "neutral"

	return "correcto"

	def analyze(self, user_text: str, document_paragraphs: list, title: str):
	# 1️⃣ RELEVANCIA GENERAL
	rel = self.relevance.relate(user_text, title, document_paragraphs)

	# ✅ FILTRO ESPECIAL PARA RAG
	if title == "RAG_CONTEXT":
	best_score = rel["best_paragraph"]["score"]
	if best_score < 0.25:
	return {
	"status": "rag_irrelevante",
	"relevance": rel,
	"message": "El contexto recuperado por el RAG no es relevante al comentario del usuario.",
	}

	# ✅ FILTRO NORMAL PARA SCRAPING
	if title != "RAG_CONTEXT" and rel["decision_document"] in [
	"no relacionado",
	"tangencial",
	]:
	return {
	"status": "poco_relevante",
	"relevance": rel,
	}

	# 2️⃣ DISTORSIÓN (NLI + HEURÍSTICAS)
	distortion = self.distortion.analyze_user_comment(
	user_text, document_paragraphs
	)

	# 3️⃣ SESGOS DEL DOCUMENTO
	full_document_text = (
	title + ". " + " ".join(p.get("text", "") for p in document_paragraphs)
	)
	biases_document = self.bias.detect(full_document_text)

	# 4️⃣ SESGOS DEL USUARIO
	biases_user = self.bias.detect(user_text)

	# 5️⃣ MINERÍA DE TEXTO
	mining = self.mining.extract(user_text)

	# ✅ ARMADO FINAL PROTEGIDO CONTRA ERRORES DE CLAVES
	contradicciones_formateadas = []

	for d in distortion.get("detalles", []):
	scores = d.get("scores_detail", {}) or {}

	contradicciones_formateadas.append(
	{
	"parrafo": d.get("paragraph", ""),
	"oracion_usuario": d.get("sentence", ""),
	"claim_extraido": d.get("cleaned_claim"),
	"claim_transformado": d.get(
	"transformed_claim", d.get("cleaned_claim")
	),
	"negacion_detectada": d.get("is_negation", False),
	"tipo_distorsion": d.get("best_label", "neutral"),
	"puntaje_principal": round(d.get("best_score", 0.0), 3),
	"puntajes_detallados": {
	"contradiccion": round(scores.get("contradiction", 0.0), 3),
	"neutral": round(scores.get("neutral", 0.0), 3),
	"coincidencia": round(scores.get("entailment", 0.0), 3),
	},
	}
	)

	return {
	"scraped_content": {
	"title": title,
	"url": "",
	"fecha_recoleccion": datetime.utcnow().isoformat(),
	"segmentos_contenido": [
	{
	"type": p.get("type", "p"),
	"text": p.get("text", ""),
	}
	for p in document_paragraphs
	],
	},
	"analisis": {
	"document_sesgo": {
	"sesgos_encontrados": [
	{
	"label": s.get("sesgo", s.get("label", "desconocido")),
	"score": s.get("confianza", s.get("score", 0.0)),
	}
	for s in biases_document.get("sesgos_detectados", [])
	],
	"explicacion": "Sesgos detectados en el documento mediante análisis heurístico.",
	},
	"user_sesgo": {
	"sesgos_encontrados": [
	{
	"label": s.get("sesgo", s.get("label", "desconocido")),
	"score": s.get("confianza", s.get("score", 0.0)),
	}
	for s in biases_user.get("sesgos_detectados", [])
	],
	"explicacion": "Sesgos detectados en el comentario del usuario.",
	},
	"document_distorsion": {
	"veredicto": self.map_veredicto(distortion.get("decision")),
	"contradicciones": contradicciones_formateadas,
	},
	"mineria": mining,
	},
	}

	# return {
	# "scraped_content": {
	# "title": title,
	# "url": "",
	# "fecha_recoleccion": datetime.utcnow().isoformat(),
	# "segmentos_contenido": [
	# {"type": p["type"], "text": p["text"]} for p in document_paragraphs
	# ],
	# },
	# "analisis": {
	# "document_sesgo": {
	# "sesgos_encontrados": [
	# {
	# "label": s.get("sesgo", s.get("label", "desconocido")),
	# "score": s.get("confianza", s.get("score", 0.0)),
	# }
	# for s in biases["sesgos_detectados"]
	# ],
	# "explicacion": "Sesgos detectados mediante análisis heurístico y de objetividad.",
	# },
	# "document_distorsion": {
	# "veredicto": self.map_veredicto(distortion["decision"]),
	# "contradicciones": [
	# {
	# "parrafo": d["paragraph"],
	# "oracion_usuario": d["sentence"],
	# "claim_extraido": d["cleaned_claim"],
	# "claim_transformado": d.get("transformed_claim", d["cleaned_claim"]),
	# "negacion_detectada": d["is_negation"],
	# "tipo_distorsion": d["best_label"],
	# "puntaje_principal": d["best_score"],
	# "puntajes_detallados": {
	# "contradiccion": round(d["scores_detail"]["contradiction"], 3),
	# "neutral": round(d["scores_detail"]["neutral"], 3),
	# "coincidencia": round(d["scores_detail"]["entailment"], 3),
	# },
	# }
	# for d in distortion["detalles"]
	# ],
	# },
	# "mineria": mining,
	# },
	# }