A-I-C-A commited on
Commit
c5e2a8f
·
verified ·
1 Parent(s): 04407a2

Upload bias_prediction_engine.py

Browse files
Files changed (1) hide show
  1. bias_prediction_engine.py +612 -0
bias_prediction_engine.py ADDED
@@ -0,0 +1,612 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Bias Detection and Outcome Prediction Engine using InLegalBERT
3
+ ================================================================
4
+
5
+ This module provides:
6
+ 1. Document/Text bias detection (gender, region, caste, etc.)
7
+ 2. RAG output bias detection (tone, interpretive bias)
8
+ 3. Systemic/Statistical bias analysis
9
+ 4. Legal outcome prediction with confidence scores
10
+
11
+ Model: InLegalBERT (Hugging Face pretrained for Indian legal cases)
12
+ """
13
+
14
+ import torch
15
+ import numpy as np
16
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
17
+ from typing import Dict, List, Any, Optional, Union
18
+ import re
19
+ from collections import Counter
20
+ import json
21
+ from datetime import datetime
22
+ import warnings
23
+ warnings.filterwarnings('ignore')
24
+
25
+ # ============================================================================
26
+ # MODEL INITIALIZATION
27
+ # ============================================================================
28
+
29
+ class InLegalBERTEngine:
30
+ """
31
+ Main engine for bias detection and outcome prediction using InLegalBERT
32
+ """
33
+
34
+ def __init__(self, model_name: str = "law-ai/InLegalBERT"):
35
+ """
36
+ Initialize the InLegalBERT model and tokenizer
37
+
38
+ Args:
39
+ model_name: HuggingFace model identifier (use your fine-tuned model path)
40
+ """
41
+ print(f"Loading InLegalBERT model: {model_name}")
42
+
43
+ # Load tokenizer and base model for embeddings
44
+ # TODO: Replace "law-ai/InLegalBERT" with your fine-tuned model path
45
+ # Example: "your-username/inlegalbert-bias-finetuned"
46
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
47
+ self.base_model = AutoModel.from_pretrained(model_name)
48
+
49
+ # Set device
50
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
51
+ self.base_model.to(self.device)
52
+ self.base_model.eval()
53
+
54
+ # Bias detection keywords (Indian legal context)
55
+ self.bias_keywords = {
56
+ 'gender': [
57
+ 'woman', 'women', 'girl', 'female', 'lady', 'wife', 'mother',
58
+ 'man', 'men', 'boy', 'male', 'husband', 'father', 'manhood', 'womanhood'
59
+ ],
60
+ 'caste': [
61
+ 'scheduled caste', 'sc', 'st', 'scheduled tribe', 'obc', 'backward class',
62
+ 'dalit', 'brahmin', 'upper caste', 'lower caste', 'caste', 'jati'
63
+ ],
64
+ 'religion': [
65
+ 'hindu', 'muslim', 'christian', 'sikh', 'buddhist', 'jain',
66
+ 'religious', 'communal', 'minority', 'majority community'
67
+ ],
68
+ 'region': [
69
+ 'north', 'south', 'east', 'west', 'rural', 'urban', 'tribal',
70
+ 'metropolitan', 'village', 'city', 'state', 'region'
71
+ ],
72
+ 'socioeconomic': [
73
+ 'poor', 'rich', 'wealthy', 'poverty', 'income', 'economically',
74
+ 'below poverty line', 'bpl', 'weaker section', 'privileged'
75
+ ],
76
+ 'age': [
77
+ 'minor', 'juvenile', 'child', 'elderly', 'senior citizen', 'youth',
78
+ 'old', 'young', 'aged'
79
+ ]
80
+ }
81
+
82
+ print(f"Model loaded successfully on {self.device}")
83
+
84
+ # ========================================================================
85
+ # UTILITY FUNCTIONS
86
+ # ========================================================================
87
+
88
+ def get_embeddings(self, text: str) -> torch.Tensor:
89
+ """
90
+ Get BERT embeddings for input text
91
+
92
+ Args:
93
+ text: Input text string
94
+
95
+ Returns:
96
+ torch.Tensor: Embedding vector
97
+ """
98
+ # Tokenize
99
+ inputs = self.tokenizer(
100
+ text,
101
+ return_tensors="pt",
102
+ truncation=True,
103
+ max_length=512,
104
+ padding=True
105
+ ).to(self.device)
106
+
107
+ # Get embeddings
108
+ with torch.no_grad():
109
+ outputs = self.base_model(**inputs)
110
+ # Use CLS token embedding (first token)
111
+ embeddings = outputs.last_hidden_state[:, 0, :]
112
+
113
+ return embeddings
114
+
115
+ def compute_bias_score(self, text: str, bias_type: str) -> float:
116
+ """
117
+ Compute bias score for a specific bias type using keyword frequency
118
+ and contextual analysis
119
+
120
+ Args:
121
+ text: Input text
122
+ bias_type: Type of bias (gender, caste, etc.)
123
+
124
+ Returns:
125
+ float: Bias score between 0 and 1
126
+ """
127
+ text_lower = text.lower()
128
+ keywords = self.bias_keywords.get(bias_type, [])
129
+
130
+ # Count keyword occurrences
131
+ keyword_count = sum(text_lower.count(keyword) for keyword in keywords)
132
+
133
+ # Normalize by text length (words)
134
+ word_count = len(text.split())
135
+ if word_count == 0:
136
+ return 0.0
137
+
138
+ # Calculate frequency-based score
139
+ frequency_score = min(keyword_count / word_count * 10, 1.0)
140
+
141
+ # Get contextual score using embeddings (simplified)
142
+ # In production, use a fine-tuned classifier
143
+ contextual_score = frequency_score * 0.8 # Simplified
144
+
145
+ return round(contextual_score, 3)
146
+
147
+ # ========================================================================
148
+ # 1. DOCUMENT/TEXT BIAS DETECTION
149
+ # ========================================================================
150
+
151
+ def detect_document_bias(self, text: str, threshold: float = 0.15) -> Dict[str, Any]:
152
+ """
153
+ Detect various biases in legal documents/FIRs/judgments
154
+
155
+ Args:
156
+ text: Legal document text
157
+ threshold: Minimum score to flag a bias (default 0.15)
158
+
159
+ Returns:
160
+ Dict containing bias flags and detailed scores
161
+ """
162
+ bias_scores = {}
163
+ bias_flags = []
164
+
165
+ # Analyze each bias type
166
+ for bias_type in self.bias_keywords.keys():
167
+ score = self.compute_bias_score(text, bias_type)
168
+ bias_scores[bias_type] = score
169
+
170
+ if score >= threshold:
171
+ bias_flags.append(bias_type)
172
+
173
+ # Determine severity levels
174
+ bias_details = []
175
+ for bias_type, score in bias_scores.items():
176
+ if score >= threshold:
177
+ severity = "high" if score >= 0.4 else "medium" if score >= 0.25 else "low"
178
+ bias_details.append({
179
+ "type": bias_type,
180
+ "severity": severity,
181
+ "score": score,
182
+ "description": f"{bias_type.capitalize()} bias detected based on keyword analysis and context"
183
+ })
184
+
185
+ return {
186
+ "biasFlags_text": bias_flags,
187
+ "bias_scores": bias_scores,
188
+ "bias_details": bias_details,
189
+ "overall_bias_score": round(np.mean(list(bias_scores.values())), 3),
190
+ "analysis_timestamp": datetime.now().isoformat()
191
+ }
192
+
193
+ # ========================================================================
194
+ # 2. RAG OUTPUT BIAS DETECTION
195
+ # ========================================================================
196
+
197
+ def detect_rag_output_bias(self,
198
+ rag_summary: str,
199
+ source_documents: List[str]) -> Dict[str, Any]:
200
+ """
201
+ Detect bias in AI-generated RAG summaries/reasoning
202
+
203
+ Args:
204
+ rag_summary: AI-generated summary or reasoning
205
+ source_documents: Original source documents used for RAG
206
+
207
+ Returns:
208
+ Dict containing RAG-specific bias flags
209
+ """
210
+ bias_flags = []
211
+ bias_details = []
212
+
213
+ # Get embeddings
214
+ summary_emb = self.get_embeddings(rag_summary)
215
+ source_embs = [self.get_embeddings(doc) for doc in source_documents[:5]] # Limit to 5
216
+
217
+ # 1. TONE BIAS - Check if summary tone differs from sources
218
+ if source_embs:
219
+ avg_source_emb = torch.mean(torch.stack(source_embs), dim=0)
220
+ # Cosine similarity
221
+ similarity = torch.nn.functional.cosine_similarity(summary_emb, avg_source_emb)
222
+
223
+ if similarity < 0.7: # Low similarity indicates tone shift
224
+ bias_flags.append("tone_bias")
225
+ bias_details.append({
226
+ "type": "tone_bias",
227
+ "severity": "medium",
228
+ "score": round(1 - similarity.item(), 3),
229
+ "description": "AI summary tone differs significantly from source documents"
230
+ })
231
+
232
+ # 2. INTERPRETIVE BIAS - Check for subjective language
233
+ subjective_words = [
234
+ 'clearly', 'obviously', 'undoubtedly', 'certainly', 'definitely',
235
+ 'surely', 'apparently', 'seemingly', 'arguably', 'presumably'
236
+ ]
237
+ summary_lower = rag_summary.lower()
238
+ subjective_count = sum(summary_lower.count(word) for word in subjective_words)
239
+
240
+ if subjective_count > 2:
241
+ bias_flags.append("interpretive_bias")
242
+ bias_details.append({
243
+ "type": "interpretive_bias",
244
+ "severity": "medium" if subjective_count > 4 else "low",
245
+ "score": round(min(subjective_count / 10, 1.0), 3),
246
+ "description": f"Summary contains {subjective_count} subjective/interpretive terms"
247
+ })
248
+
249
+ # 3. SELECTIVITY BIAS - Check if summary over-represents certain aspects
250
+ # Count mentions of different legal aspects
251
+ aspects = {
252
+ 'procedural': ['procedure', 'process', 'filing', 'hearing', 'appeal'],
253
+ 'substantive': ['law', 'statute', 'provision', 'section', 'act'],
254
+ 'factual': ['fact', 'evidence', 'witness', 'testimony', 'statement']
255
+ }
256
+
257
+ aspect_counts = {k: sum(summary_lower.count(w) for w in v) for k, v in aspects.items()}
258
+ max_count = max(aspect_counts.values()) if aspect_counts.values() else 1
259
+
260
+ if max_count > 5 and any(count < max_count * 0.3 for count in aspect_counts.values()):
261
+ bias_flags.append("selectivity_bias")
262
+ bias_details.append({
263
+ "type": "selectivity_bias",
264
+ "severity": "low",
265
+ "score": 0.4,
266
+ "description": "Summary may over-emphasize certain legal aspects"
267
+ })
268
+
269
+ return {
270
+ "biasFlags_output": bias_flags,
271
+ "bias_details": bias_details,
272
+ "analysis_timestamp": datetime.now().isoformat()
273
+ }
274
+
275
+ # ========================================================================
276
+ # 3. SYSTEMIC/STATISTICAL BIAS DETECTION
277
+ # ========================================================================
278
+
279
+ def detect_systemic_bias(self,
280
+ historical_cases: List[Dict[str, Any]]) -> Dict[str, Any]:
281
+ """
282
+ Analyze systemic and statistical biases from historical case data
283
+
284
+ Args:
285
+ historical_cases: List of case dictionaries with keys:
286
+ - outcome: str (e.g., "conviction", "acquittal")
287
+ - gender: str (optional)
288
+ - region: str (optional)
289
+ - caste: str (optional)
290
+ - case_type: str
291
+ - year: int
292
+
293
+ Returns:
294
+ Dict containing systemic bias metrics and dashboard data
295
+ """
296
+ if not historical_cases:
297
+ return {"error": "No historical cases provided"}
298
+
299
+ # Initialize analytics
300
+ outcome_by_gender = {}
301
+ outcome_by_region = {}
302
+ outcome_by_caste = {}
303
+ outcome_by_year = {}
304
+
305
+ # Process cases
306
+ for case in historical_cases:
307
+ outcome = case.get('outcome', 'unknown')
308
+
309
+ # Gender analysis
310
+ if 'gender' in case:
311
+ gender = case['gender']
312
+ if gender not in outcome_by_gender:
313
+ outcome_by_gender[gender] = []
314
+ outcome_by_gender[gender].append(outcome)
315
+
316
+ # Region analysis
317
+ if 'region' in case:
318
+ region = case['region']
319
+ if region not in outcome_by_region:
320
+ outcome_by_region[region] = []
321
+ outcome_by_region[region].append(outcome)
322
+
323
+ # Caste analysis
324
+ if 'caste' in case:
325
+ caste = case['caste']
326
+ if caste not in outcome_by_caste:
327
+ outcome_by_caste[caste] = []
328
+ outcome_by_caste[caste].append(outcome)
329
+
330
+ # Temporal analysis
331
+ if 'year' in case:
332
+ year = case['year']
333
+ if year not in outcome_by_year:
334
+ outcome_by_year[year] = []
335
+ outcome_by_year[year].append(outcome)
336
+
337
+ # Calculate disparity metrics
338
+ def calculate_disparity(outcome_dict: Dict) -> Dict:
339
+ """Calculate outcome disparities"""
340
+ disparity_data = {}
341
+ for category, outcomes in outcome_dict.items():
342
+ total = len(outcomes)
343
+ if total > 0:
344
+ conviction_rate = outcomes.count('conviction') / total
345
+ disparity_data[category] = {
346
+ 'total_cases': total,
347
+ 'conviction_rate': round(conviction_rate, 3),
348
+ 'acquittal_rate': round(outcomes.count('acquittal') / total, 3)
349
+ }
350
+ return disparity_data
351
+
352
+ gender_disparity = calculate_disparity(outcome_by_gender)
353
+ region_disparity = calculate_disparity(outcome_by_region)
354
+ caste_disparity = calculate_disparity(outcome_by_caste)
355
+
356
+ # Detect significant disparities
357
+ bias_flags = []
358
+
359
+ if gender_disparity:
360
+ rates = [d['conviction_rate'] for d in gender_disparity.values()]
361
+ if max(rates) - min(rates) > 0.15:
362
+ bias_flags.append("gender_disparity")
363
+
364
+ if region_disparity:
365
+ rates = [d['conviction_rate'] for d in region_disparity.values()]
366
+ if max(rates) - min(rates) > 0.15:
367
+ bias_flags.append("regional_disparity")
368
+
369
+ if caste_disparity:
370
+ rates = [d['conviction_rate'] for d in caste_disparity.values()]
371
+ if max(rates) - min(rates) > 0.15:
372
+ bias_flags.append("caste_disparity")
373
+
374
+ # Generate dashboard-ready data
375
+ dashboard_data = {
376
+ "summary_metrics": {
377
+ "total_cases_analyzed": len(historical_cases),
378
+ "overall_conviction_rate": round(
379
+ sum(1 for c in historical_cases if c.get('outcome') == 'conviction') / len(historical_cases),
380
+ 3
381
+ ),
382
+ "bias_flags_detected": len(bias_flags)
383
+ },
384
+ "gender_analysis": {
385
+ "disparity_data": gender_disparity,
386
+ "chart_data": [
387
+ {"category": k, "conviction_rate": v['conviction_rate']}
388
+ for k, v in gender_disparity.items()
389
+ ]
390
+ },
391
+ "regional_analysis": {
392
+ "disparity_data": region_disparity,
393
+ "chart_data": [
394
+ {"category": k, "conviction_rate": v['conviction_rate']}
395
+ for k, v in region_disparity.items()
396
+ ]
397
+ },
398
+ "caste_analysis": {
399
+ "disparity_data": caste_disparity,
400
+ "chart_data": [
401
+ {"category": k, "conviction_rate": v['conviction_rate']}
402
+ for k, v in caste_disparity.items()
403
+ ]
404
+ },
405
+ "temporal_trends": {
406
+ "by_year": {
407
+ year: {
408
+ 'total': len(outcomes),
409
+ 'conviction_rate': round(outcomes.count('conviction') / len(outcomes), 3)
410
+ }
411
+ for year, outcomes in outcome_by_year.items()
412
+ }
413
+ }
414
+ }
415
+
416
+ return {
417
+ "systemic_bias_flags": bias_flags,
418
+ "biasDashboardData": dashboard_data,
419
+ "analysis_timestamp": datetime.now().isoformat()
420
+ }
421
+
422
+ # ========================================================================
423
+ # 4. OUTCOME PREDICTION
424
+ # ========================================================================
425
+
426
+ def predict_outcome(self,
427
+ case_text: str,
428
+ case_metadata: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
429
+ """
430
+ Predict legal case outcome using InLegalBERT embeddings and heuristics
431
+
432
+ Args:
433
+ case_text: Full case text (FIR, facts, arguments, etc.)
434
+ case_metadata: Optional metadata (case_type, jurisdiction, etc.)
435
+
436
+ Returns:
437
+ Dict containing prediction, confidence, and justification
438
+ """
439
+ # Get text embeddings
440
+ embeddings = self.get_embeddings(case_text)
441
+
442
+ # Keyword-based prediction (simplified - in production use fine-tuned classifier)
443
+ conviction_keywords = [
444
+ 'guilty', 'convicted', 'evidence proves', 'beyond reasonable doubt',
445
+ 'establish', 'proven', 'corroborated', 'substantiated'
446
+ ]
447
+ acquittal_keywords = [
448
+ 'not guilty', 'acquitted', 'benefit of doubt', 'insufficient evidence',
449
+ 'failed to prove', 'contradictory', 'unreliable', 'doubt'
450
+ ]
451
+
452
+ text_lower = case_text.lower()
453
+ conviction_score = sum(text_lower.count(kw) for kw in conviction_keywords)
454
+ acquittal_score = sum(text_lower.count(kw) for kw in acquittal_keywords)
455
+
456
+ # Calculate prediction
457
+ total_score = conviction_score + acquittal_score
458
+ if total_score == 0:
459
+ # No strong indicators, use neutral prediction
460
+ predicted_outcome = "uncertain"
461
+ confidence_score = 0.5
462
+ justification = "Insufficient textual indicators for confident prediction"
463
+ else:
464
+ conviction_prob = conviction_score / total_score
465
+
466
+ if conviction_prob > 0.6:
467
+ predicted_outcome = "conviction"
468
+ confidence_score = round(conviction_prob, 3)
469
+ justification = f"Text analysis shows {conviction_score} conviction indicators vs {acquittal_score} acquittal indicators"
470
+ elif conviction_prob < 0.4:
471
+ predicted_outcome = "acquittal"
472
+ confidence_score = round(1 - conviction_prob, 3)
473
+ justification = f"Text analysis shows {acquittal_score} acquittal indicators vs {conviction_score} conviction indicators"
474
+ else:
475
+ predicted_outcome = "uncertain"
476
+ confidence_score = 0.5
477
+ justification = "Mixed indicators suggest uncertain outcome"
478
+
479
+ # Adjust for metadata if provided
480
+ if case_metadata:
481
+ case_type = case_metadata.get('case_type', '').lower()
482
+
483
+ # Example adjustments (customize based on domain knowledge)
484
+ if 'bail' in case_type:
485
+ if predicted_outcome == "conviction":
486
+ predicted_outcome = "bail_denied"
487
+ justification += "; Bail application context considered"
488
+ elif predicted_outcome == "acquittal":
489
+ predicted_outcome = "bail_granted"
490
+ justification += "; Bail application context considered"
491
+
492
+ # Confidence level categorization
493
+ if confidence_score >= 0.75:
494
+ confidence_level = "high"
495
+ elif confidence_score >= 0.5:
496
+ confidence_level = "medium"
497
+ else:
498
+ confidence_level = "low"
499
+
500
+ return {
501
+ "predictedOutcome": predicted_outcome,
502
+ "confidenceScore": confidence_score,
503
+ "confidenceLevel": confidence_level,
504
+ "justification": justification,
505
+ "embedding_norm": float(torch.norm(embeddings).item()),
506
+ "analysis_timestamp": datetime.now().isoformat()
507
+ }
508
+
509
+ # ============================================================================
510
+ # API INTERFACE FUNCTIONS
511
+ # ============================================================================
512
+
513
+ # Global model instance (loaded once)
514
+ _model_instance = None
515
+
516
+ def get_model() -> InLegalBERTEngine:
517
+ """Get or create model instance (singleton pattern)"""
518
+ global _model_instance
519
+ if _model_instance is None:
520
+ _model_instance = InLegalBERTEngine()
521
+ return _model_instance
522
+
523
+
524
+ def analyze_legal_case(
525
+ case_text: str,
526
+ rag_summary: Optional[str] = None,
527
+ source_documents: Optional[List[str]] = None,
528
+ historical_cases: Optional[List[Dict]] = None,
529
+ case_metadata: Optional[Dict] = None
530
+ ) -> Dict[str, Any]:
531
+ """
532
+ Main API function for comprehensive legal case analysis
533
+
534
+ Args:
535
+ case_text: Legal document/FIR/judgment text
536
+ rag_summary: AI-generated summary (for RAG bias detection)
537
+ source_documents: Source docs used for RAG (for RAG bias detection)
538
+ historical_cases: Historical case data (for systemic bias analysis)
539
+ case_metadata: Case metadata for outcome prediction
540
+
541
+ Returns:
542
+ JSON-serializable dict with all analysis results
543
+ """
544
+ model = get_model()
545
+
546
+ results = {
547
+ "status": "success",
548
+ "analysis_id": f"analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
549
+ "timestamp": datetime.now().isoformat()
550
+ }
551
+
552
+ # 1. Document bias detection
553
+ if case_text:
554
+ results["document_bias"] = model.detect_document_bias(case_text)
555
+
556
+ # 2. RAG output bias detection
557
+ if rag_summary and source_documents:
558
+ results["rag_bias"] = model.detect_rag_output_bias(rag_summary, source_documents)
559
+
560
+ # 3. Systemic bias analysis
561
+ if historical_cases:
562
+ results["systemic_bias"] = model.detect_systemic_bias(historical_cases)
563
+
564
+ # 4. Outcome prediction
565
+ if case_text:
566
+ results["outcome_prediction"] = model.predict_outcome(case_text, case_metadata)
567
+
568
+ return results
569
+
570
+
571
+ # ============================================================================
572
+ # EXAMPLE USAGE
573
+ # ============================================================================
574
+
575
+ if __name__ == "__main__":
576
+ # Example legal case text
577
+ sample_case = """
578
+ The accused, a 35-year-old woman from rural Maharashtra, was charged under
579
+ Section 302 IPC for alleged murder. The prosecution's case relies heavily on
580
+ circumstantial evidence. The witness testimonies are contradictory, and the
581
+ forensic evidence is inconclusive. The accused belongs to a scheduled caste
582
+ community. The defense argues that there is insufficient evidence to establish
583
+ guilt beyond reasonable doubt.
584
+ """
585
+
586
+ # Example RAG summary
587
+ sample_rag_summary = """
588
+ Clearly, the evidence points toward acquittal. The case obviously lacks
589
+ substantial proof of guilt.
590
+ """
591
+
592
+ # Example historical cases
593
+ sample_historical = [
594
+ {"outcome": "conviction", "gender": "male", "region": "urban", "year": 2020},
595
+ {"outcome": "acquittal", "gender": "female", "region": "rural", "year": 2020},
596
+ {"outcome": "conviction", "gender": "male", "region": "urban", "year": 2021},
597
+ {"outcome": "conviction", "gender": "female", "region": "urban", "year": 2021},
598
+ ]
599
+
600
+ # Run comprehensive analysis
601
+ print("Running comprehensive legal analysis...\n")
602
+ results = analyze_legal_case(
603
+ case_text=sample_case,
604
+ rag_summary=sample_rag_summary,
605
+ source_documents=[sample_case],
606
+ historical_cases=sample_historical,
607
+ case_metadata={"case_type": "criminal", "jurisdiction": "Maharashtra"}
608
+ )
609
+
610
+ # Print results
611
+ print(json.dumps(results, indent=2))
612
+