Adityahulk commited on
Commit
e46a637
Β·
1 Parent(s): 5368281

integrating reflexion agent

Browse files
.env.example CHANGED
@@ -3,8 +3,35 @@ PROMPT_SCENE_GEN_MODEL=groq/llama-3.3-70b-versatile
3
  PDF_SCENE_GEN_MODEL=gemini/gemini-1.5-flash
4
  PDF_RETRY_MODEL=gemini/gemini-2.0-flash-exp #Optional, only if you want to retry the PDF generation
5
  CODE_GEN_MODEL=openrouter/deepseek/deepseek-chat:free
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # Use the LiteLLM convention of naming the API keys depending on the models you choose
8
  GROQ_API_KEY=
9
  OPENROUTER_API_KEY=
10
- GEMINI_API_KEY=
 
 
 
 
 
 
 
3
  PDF_SCENE_GEN_MODEL=gemini/gemini-1.5-flash
4
  PDF_RETRY_MODEL=gemini/gemini-2.0-flash-exp #Optional, only if you want to retry the PDF generation
5
  CODE_GEN_MODEL=openrouter/deepseek/deepseek-chat:free
6
+ CODE_GEN_FALLBACK_MODEL=openrouter/anthropic/claude-sonnet-4 # Optional fallback model
7
 
8
+ # ============================================================================
9
+ # Reflexion Agent Configuration
10
+ # ============================================================================
11
+ # The Reflexion Agent uses an Actor-Critic-Reflector loop for improved code quality.
12
+ # It generates code, critiques it for issues, and fixes problems before rendering.
13
+
14
+ # Enable/disable Reflexion Agent (default: true)
15
+ REFLEXION_ENABLED=true
16
+
17
+ # Critic model - used for code review (stronger model recommended)
18
+ # This model analyzes code for visual overlaps, API misuse, logic errors, etc.
19
+ REFLEXION_CRITIC_MODEL=openrouter/anthropic/claude-sonnet-4
20
+
21
+ # Maximum iterations for reflexion loop (default: 2)
22
+ # Each iteration: generate -> critique -> fix
23
+ REFLEXION_MAX_ITERATIONS=2
24
+
25
+ # ============================================================================
26
+ # API Keys
27
+ # ============================================================================
28
  # Use the LiteLLM convention of naming the API keys depending on the models you choose
29
  GROQ_API_KEY=
30
  OPENROUTER_API_KEY=
31
+ GEMINI_API_KEY=
32
+
33
+ # Internal API key for securing endpoints (generate a random UUID)
34
+ INTERNAL_API_KEY=
35
+
36
+ # ElevenLabs API key for voiceover generation
37
+ ELEVENLABS_API_KEY=
api_server.py CHANGED
@@ -327,7 +327,8 @@ class VideoGenerator:
327
  code = generate_animation_response(
328
  input_data=job.get("input_data", ""),
329
  input_type=job["input_type"],
330
- category=job["category"]
 
331
  )
332
 
333
  logger.info(f"βœ… Code generation complete for job {job_id[:8]}...")
 
327
  code = generate_animation_response(
328
  input_data=job.get("input_data", ""),
329
  input_type=job["input_type"],
330
+ category=job["category"],
331
+ job_id=job_id # Pass job_id for Reflexion logging
332
  )
333
 
334
  logger.info(f"βœ… Code generation complete for job {job_id[:8]}...")
manimator/agents/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agents module for Manimator
3
+
4
+ Contains intelligent agents for code generation and improvement.
5
+ """
6
+
7
+ from .reflexion_agent import ReflexionAgent, ReflectionState
8
+
9
+ __all__ = ["ReflexionAgent", "ReflectionState"]
manimator/agents/reflexion_agent.py ADDED
@@ -0,0 +1,533 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Reflexion Agent for Manim Code Generation
3
+
4
+ 2-iteration self-improvement loop:
5
+ - Actor: Generates code using primary model
6
+ - Critic: Reviews code for issues (uses stronger model)
7
+ - Reflector: Fixes issues based on critique
8
+
9
+ Integrated with existing Manimator architecture.
10
+ """
11
+
12
+ import json
13
+ import logging
14
+ import re
15
+ import os
16
+ from dataclasses import dataclass, field
17
+ from datetime import datetime
18
+ from typing import List, Optional, Tuple
19
+ import litellm
20
+
21
+ from ..utils.system_prompts import get_system_prompt
22
+ from ..utils.code_postprocessor import post_process_code
23
+ from ..utils.code_validator import CodeValidator
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ @dataclass
29
+ class Issue:
30
+ """Single code issue identified by critic"""
31
+ severity: str # "low" | "medium" | "high"
32
+ category: str # "OVERLAP" | "API_MISUSE" | "LOGIC_ERROR" | "BEST_PRACTICE"
33
+ line_range: Optional[Tuple[int, int]] = None
34
+ description: str = ""
35
+ suggestion: str = ""
36
+
37
+
38
+ @dataclass
39
+ class CritiqueResult:
40
+ """Result from critic code review"""
41
+ has_issues: bool
42
+ overall_severity: str # "none" | "low" | "medium" | "high"
43
+ issues: List[Issue] = field(default_factory=list)
44
+ summary: str = ""
45
+
46
+
47
+ @dataclass
48
+ class Reflection:
49
+ """Lesson learned from fixing an issue"""
50
+ iteration: int
51
+ original_issue: str
52
+ root_cause: str
53
+ lesson: str
54
+ timestamp: datetime = field(default_factory=datetime.now)
55
+
56
+
57
+ @dataclass
58
+ class ReflectionState:
59
+ """State of reflexion process"""
60
+ job_id: str
61
+ goal: str
62
+ category: str
63
+ iteration: int = 0
64
+ max_iterations: int = 2
65
+
66
+ current_code: str = ""
67
+ status: str = "GENERATING" # GENERATING | CRITIQUING | FIXING | COMPLETE
68
+
69
+ critique: Optional[CritiqueResult] = None
70
+ reflection_memory: List[Reflection] = field(default_factory=list)
71
+
72
+ # Metrics
73
+ total_issues_found: int = 0
74
+ total_issues_fixed: int = 0
75
+ generation_time_ms: int = 0
76
+
77
+
78
+ class ReflexionAgent:
79
+ """
80
+ Reflexion-based code generation with self-improvement
81
+
82
+ Uses a critic model to review code and an actor model for generation/fixing.
83
+ Max 2 iterations by default.
84
+ """
85
+
86
+ def __init__(self):
87
+ # Actor model (for generation and fixing) - uses existing CODE_GEN_MODEL
88
+ self.actor_model = os.getenv("CODE_GEN_MODEL", "openrouter/anthropic/claude-sonnet-4")
89
+
90
+ # Critic model (for code review) - uses stronger model for better critique
91
+ self.critic_model = os.getenv("REFLEXION_CRITIC_MODEL", "openrouter/anthropic/claude-sonnet-4")
92
+
93
+ # Configuration
94
+ self.max_iterations = int(os.getenv("REFLEXION_MAX_ITERATIONS", "2"))
95
+ self.enabled = os.getenv("REFLEXION_ENABLED", "true").lower() == "true"
96
+
97
+ # Validator for syntax checks
98
+ self.validator = CodeValidator()
99
+
100
+ logger.info(f"πŸ”„ ReflexionAgent initialized")
101
+ logger.info(f" Actor model: {self.actor_model}")
102
+ logger.info(f" Critic model: {self.critic_model}")
103
+ logger.info(f" Max iterations: {self.max_iterations}")
104
+ logger.info(f" Enabled: {self.enabled}")
105
+
106
+ def generate_with_reflection(
107
+ self,
108
+ goal: str,
109
+ category: str,
110
+ job_id: str = "unknown"
111
+ ) -> Tuple[str, ReflectionState]:
112
+ """
113
+ Main reflexion loop - synchronous version for integration
114
+
115
+ Args:
116
+ goal: User's animation request (processed prompt)
117
+ category: Animation category (tech_system, product_startup, mathematical)
118
+ job_id: Job identifier for logging
119
+
120
+ Returns:
121
+ Tuple of (final_code, ReflectionState with stats)
122
+ """
123
+ import time
124
+ start_time = time.time()
125
+
126
+ state = ReflectionState(
127
+ job_id=job_id,
128
+ goal=goal,
129
+ category=category,
130
+ max_iterations=self.max_iterations
131
+ )
132
+
133
+ logger.info(f"πŸ”„ Starting Reflexion for job {job_id[:8] if len(job_id) > 8 else job_id}")
134
+ logger.info(f" Max iterations: {self.max_iterations}")
135
+
136
+ for iteration in range(1, self.max_iterations + 1):
137
+ state.iteration = iteration
138
+ logger.info(f"\n{'='*60}")
139
+ logger.info(f"REFLEXION ITERATION {iteration}/{self.max_iterations}")
140
+ logger.info(f"{'='*60}")
141
+
142
+ # [1] ACTOR: Generate code
143
+ state.status = "GENERATING"
144
+ logger.info("πŸ€– [ACTOR] Generating code...")
145
+
146
+ state.current_code = self._generate_code(
147
+ goal=goal,
148
+ category=category,
149
+ reflection_memory=state.reflection_memory
150
+ )
151
+
152
+ # Apply post-processing
153
+ state.current_code = post_process_code(state.current_code)
154
+
155
+ code_lines = state.current_code.count('\n') + 1
156
+ logger.info(f" βœ… Generated {code_lines} lines of code")
157
+
158
+ # [2] CRITIC: Review code
159
+ state.status = "CRITIQUING"
160
+ logger.info("πŸ” [CRITIC] Reviewing code...")
161
+
162
+ critique = self._critique_code(state.current_code, category)
163
+ state.critique = critique
164
+ state.total_issues_found += len(critique.issues)
165
+
166
+ if not critique.has_issues or critique.overall_severity == "none":
167
+ logger.info(" βœ… Critic: Code looks good!")
168
+ state.status = "COMPLETE"
169
+ break
170
+
171
+ if critique.overall_severity == "low":
172
+ logger.info(f" βœ… Critic: Minor issues only ({len(critique.issues)}), acceptable")
173
+ state.status = "COMPLETE"
174
+ break
175
+
176
+ # Log issues found
177
+ logger.warning(f" ⚠️ Critic found {len(critique.issues)} issues:")
178
+ for i, issue in enumerate(critique.issues[:5], 1): # Show top 5
179
+ severity_emoji = {"high": "πŸ”΄", "medium": "🟑", "low": "🟒"}.get(issue.severity, "βšͺ")
180
+ logger.warning(f" {i}. {severity_emoji} [{issue.category}] {issue.description[:80]}...")
181
+
182
+ # [3] REFLECTOR: Fix issues (only if not last iteration)
183
+ if iteration < self.max_iterations:
184
+ state.status = "FIXING"
185
+ logger.info("πŸ”§ [REFLECTOR] Fixing issues...")
186
+
187
+ fixed_code, reflection = self._fix_code(
188
+ original_code=state.current_code,
189
+ critique=critique,
190
+ iteration=iteration
191
+ )
192
+
193
+ # Apply post-processing to fixed code
194
+ state.current_code = post_process_code(fixed_code)
195
+ state.reflection_memory.append(reflection)
196
+ state.total_issues_fixed += len(critique.issues)
197
+
198
+ logger.info(f" βœ… Fixed. Lesson: {reflection.lesson[:80]}...")
199
+ else:
200
+ logger.warning(" ⚠️ Max iterations reached, using best attempt")
201
+ state.status = "COMPLETE"
202
+
203
+ # Calculate metrics
204
+ state.generation_time_ms = int((time.time() - start_time) * 1000)
205
+
206
+ logger.info(f"\n{'='*60}")
207
+ logger.info("πŸ“Š REFLEXION COMPLETE")
208
+ logger.info(f"{'='*60}")
209
+ logger.info(f" Total iterations: {state.iteration}")
210
+ logger.info(f" Issues found: {state.total_issues_found}")
211
+ logger.info(f" Issues fixed: {state.total_issues_fixed}")
212
+ logger.info(f" Lessons learned: {len(state.reflection_memory)}")
213
+ logger.info(f" Total time: {state.generation_time_ms}ms")
214
+ logger.info(f" Final status: {state.status}")
215
+
216
+ return state.current_code, state
217
+
218
+ def _generate_code(
219
+ self,
220
+ goal: str,
221
+ category: str,
222
+ reflection_memory: List[Reflection]
223
+ ) -> str:
224
+ """Generate Manim code, including reflection lessons if available"""
225
+
226
+ # Build system prompt with lessons from memory
227
+ system_prompt = get_system_prompt(category)
228
+
229
+ if reflection_memory:
230
+ lessons = "\n".join([
231
+ f"- {r.lesson}" for r in reflection_memory
232
+ ])
233
+ system_prompt += f"""
234
+
235
+ ## 🚨 CRITICAL LESSONS FROM PREVIOUS ATTEMPT:
236
+ {lessons}
237
+
238
+ YOU MUST APPLY THESE LESSONS IN YOUR CODE! Do not repeat these mistakes.
239
+ """
240
+
241
+ # Build user message
242
+ user_message = f"""Create a video about:
243
+
244
+ {goal}
245
+
246
+ CRITICAL REMINDERS:
247
+ 1. NO BLANK SCREENS: Keep the screen populated. If a voiceover is playing, show something.
248
+ 2. NO OVERLAPS: Ensure text and objects do not overlap. Use `next_to` and `arrange` with proper `buff`.
249
+ 3. CLEAN TRANSITIONS: Fade out old content before showing new content.
250
+ 4. VARIED ANIMATIONS: Use a mix of Write, FadeIn, GrowFromCenter, etc.
251
+ 5. STAY ON SCREEN: Ensure all text and objects are within screen boundaries.
252
+ """
253
+
254
+ messages = [
255
+ {"role": "system", "content": system_prompt},
256
+ {"role": "user", "content": user_message}
257
+ ]
258
+
259
+ try:
260
+ response = litellm.completion(
261
+ model=self.actor_model,
262
+ messages=messages,
263
+ num_retries=2
264
+ )
265
+
266
+ content = response.choices[0].message.content
267
+ code = self._extract_code(content)
268
+
269
+ return code
270
+
271
+ except Exception as e:
272
+ logger.error(f"Actor generation failed: {e}")
273
+ raise
274
+
275
+ def _critique_code(self, code: str, category: str) -> CritiqueResult:
276
+ """Critique code and return structured issues"""
277
+
278
+ critique_prompt = f"""You are an expert Manim code reviewer. Analyze this {category} animation code for potential issues.
279
+
280
+ CODE TO REVIEW:
281
+ ```python
282
+ {code}
283
+ ```
284
+
285
+ Review specifically for:
286
+
287
+ 1. **Visual Overlaps** (CRITICAL):
288
+ - VGroup misuse (arranging mixed types together)
289
+ - Objects placed at same position without offset
290
+ - Text stacking on top of other text
291
+ - Elements not using next_to() or arrange() properly
292
+
293
+ 2. **Manim API Misuse**:
294
+ - Invalid parameters (corner_radius on Rectangle, etc.)
295
+ - Deprecated methods
296
+ - Incorrect animation calls
297
+
298
+ 3. **Logic Errors**:
299
+ - Objects used before definition
300
+ - Animations on removed objects
301
+ - Incorrect loop logic
302
+
303
+ 4. **Best Practices**:
304
+ - Blank screens during voiceover
305
+ - Missing cleanup (FadeOut before new content)
306
+ - Objects going off-screen
307
+ - Poor animation variety
308
+
309
+ For EACH issue found, provide:
310
+ - severity: "low" | "medium" | "high"
311
+ - category: "OVERLAP" | "API_MISUSE" | "LOGIC_ERROR" | "BEST_PRACTICE"
312
+ - line_range: [start_line, end_line] if identifiable
313
+ - description: What's wrong
314
+ - suggestion: How to fix it
315
+
316
+ If the code is well-written with no significant issues, respond with:
317
+ {{"has_issues": false, "overall_severity": "none", "issues": [], "summary": "Code is well-structured"}}
318
+
319
+ Respond ONLY with valid JSON in this exact format:
320
+ ```json
321
+ {{
322
+ "has_issues": true,
323
+ "overall_severity": "high",
324
+ "issues": [
325
+ {{
326
+ "severity": "high",
327
+ "category": "OVERLAP",
328
+ "line_range": [65, 68],
329
+ "description": "VGroup(boxes, labels).arrange() treats these as 2 items, causing all boxes to overlap",
330
+ "suggestion": "Create individual VGroup pairs: [VGroup(box, label) for box, label in zip(boxes, labels)], then arrange pairs"
331
+ }}
332
+ ],
333
+ "summary": "Found 1 critical overlap issue"
334
+ }}
335
+ ```
336
+ """
337
+
338
+ try:
339
+ response = litellm.completion(
340
+ model=self.critic_model,
341
+ messages=[{"role": "user", "content": critique_prompt}],
342
+ num_retries=2
343
+ )
344
+
345
+ content = response.choices[0].message.content
346
+ critique = self._parse_critique(content)
347
+
348
+ return critique
349
+
350
+ except Exception as e:
351
+ logger.error(f"Critic review failed: {e}")
352
+ # Return safe default (no issues) to allow code to proceed
353
+ return CritiqueResult(
354
+ has_issues=False,
355
+ overall_severity="none",
356
+ issues=[],
357
+ summary=f"Critique failed: {str(e)}"
358
+ )
359
+
360
+ def _fix_code(
361
+ self,
362
+ original_code: str,
363
+ critique: CritiqueResult,
364
+ iteration: int
365
+ ) -> Tuple[str, Reflection]:
366
+ """Fix code based on critique, return fixed code + lesson learned"""
367
+
368
+ # Build detailed fix prompt
369
+ issues_summary = "\n".join([
370
+ f"- [{i.severity.upper()}] {i.category} (Lines {i.line_range if i.line_range else 'N/A'}): {i.description}"
371
+ for i in critique.issues
372
+ ])
373
+
374
+ suggestions = "\n".join([
375
+ f"- {i.suggestion}"
376
+ for i in critique.issues if i.suggestion
377
+ ])
378
+
379
+ fix_prompt = f"""Fix this Manim code based on the expert code review.
380
+
381
+ ORIGINAL CODE:
382
+ ```python
383
+ {original_code}
384
+ ```
385
+
386
+ ISSUES IDENTIFIED:
387
+ {issues_summary}
388
+
389
+ SPECIFIC FIX SUGGESTIONS:
390
+ {suggestions}
391
+
392
+ INSTRUCTIONS:
393
+ 1. Apply ALL suggested fixes
394
+ 2. Preserve all working parts of the code
395
+ 3. Ensure no new issues are introduced
396
+ 4. Keep the same class name and overall structure
397
+
398
+ Return the COMPLETE fixed Python code.
399
+ """
400
+
401
+ try:
402
+ response = litellm.completion(
403
+ model=self.actor_model,
404
+ messages=[{"role": "user", "content": fix_prompt}],
405
+ num_retries=2
406
+ )
407
+
408
+ content = response.choices[0].message.content
409
+ fixed_code = self._extract_code(content)
410
+
411
+ # Create reflection/lesson from top issue
412
+ top_issue = critique.issues[0] if critique.issues else None
413
+ reflection = Reflection(
414
+ iteration=iteration,
415
+ original_issue=top_issue.description if top_issue else "Unknown issue",
416
+ root_cause=top_issue.category if top_issue else "UNKNOWN",
417
+ lesson=top_issue.suggestion if top_issue else "No specific lesson",
418
+ timestamp=datetime.now()
419
+ )
420
+
421
+ return fixed_code, reflection
422
+
423
+ except Exception as e:
424
+ logger.error(f"Reflector fix failed: {e}")
425
+ # Return original code if fix fails
426
+ reflection = Reflection(
427
+ iteration=iteration,
428
+ original_issue="Fix attempt failed",
429
+ root_cause="ERROR",
430
+ lesson=f"Fix failed: {str(e)}",
431
+ timestamp=datetime.now()
432
+ )
433
+ return original_code, reflection
434
+
435
+ def _extract_code(self, content: str) -> str:
436
+ """Extract Python code from markdown-wrapped response"""
437
+ # Try to find ```python code blocks
438
+ match = re.search(r'```python\n(.*?)```', content, re.DOTALL)
439
+ if match:
440
+ return match.group(1).strip()
441
+
442
+ # Try generic code blocks
443
+ match = re.search(r'```\n(.*?)```', content, re.DOTALL)
444
+ if match:
445
+ return match.group(1).strip()
446
+
447
+ # Fallback: return as-is (might already be code)
448
+ return content.strip()
449
+
450
+ def _parse_critique(self, content: str) -> CritiqueResult:
451
+ """Parse critic JSON response into CritiqueResult"""
452
+ try:
453
+ # Try to extract JSON from markdown
454
+ match = re.search(r'```json\n(.*?)```', content, re.DOTALL)
455
+ if match:
456
+ json_str = match.group(1)
457
+ else:
458
+ # Try to find raw JSON
459
+ json_str = content
460
+
461
+ # Clean up common issues
462
+ json_str = json_str.strip()
463
+
464
+ data = json.loads(json_str)
465
+
466
+ issues = []
467
+ for i in data.get("issues", []):
468
+ line_range = None
469
+ if i.get("line_range"):
470
+ try:
471
+ lr = i["line_range"]
472
+ if isinstance(lr, list) and len(lr) == 2:
473
+ line_range = (int(lr[0]), int(lr[1]))
474
+ except (ValueError, TypeError):
475
+ pass
476
+
477
+ issues.append(Issue(
478
+ severity=i.get("severity", "medium"),
479
+ category=i.get("category", "UNKNOWN"),
480
+ line_range=line_range,
481
+ description=i.get("description", ""),
482
+ suggestion=i.get("suggestion", "")
483
+ ))
484
+
485
+ return CritiqueResult(
486
+ has_issues=data.get("has_issues", False),
487
+ overall_severity=data.get("overall_severity", "low"),
488
+ issues=issues,
489
+ summary=data.get("summary", f"Found {len(issues)} issues" if issues else "No issues")
490
+ )
491
+
492
+ except json.JSONDecodeError as e:
493
+ logger.warning(f"Failed to parse critique as JSON: {e}")
494
+ logger.debug(f"Content: {content[:500]}")
495
+
496
+ # Try to extract any useful info from non-JSON response
497
+ has_issues = "issue" in content.lower() or "error" in content.lower() or "problem" in content.lower()
498
+
499
+ return CritiqueResult(
500
+ has_issues=has_issues,
501
+ overall_severity="low" if has_issues else "none",
502
+ issues=[],
503
+ summary="Could not parse structured critique"
504
+ )
505
+ except Exception as e:
506
+ logger.error(f"Critique parsing error: {e}")
507
+ return CritiqueResult(
508
+ has_issues=False,
509
+ overall_severity="none",
510
+ issues=[],
511
+ summary=f"Parse error: {str(e)}"
512
+ )
513
+
514
+
515
+ # Convenience function for direct use
516
+ def generate_with_reflexion(
517
+ goal: str,
518
+ category: str,
519
+ job_id: str = "unknown"
520
+ ) -> Tuple[str, ReflectionState]:
521
+ """
522
+ Convenience function to generate code with reflexion.
523
+
524
+ Args:
525
+ goal: User's animation request
526
+ category: Animation category
527
+ job_id: Job identifier
528
+
529
+ Returns:
530
+ Tuple of (code, state)
531
+ """
532
+ agent = ReflexionAgent()
533
+ return agent.generate_with_reflection(goal, category, job_id)
manimator/api/animation_generation.py CHANGED
@@ -7,42 +7,53 @@ from ..utils.system_prompts import get_system_prompt
7
  from ..utils.code_postprocessor import post_process_code
8
  from ..utils.code_validator import CodeValidator
9
  from ..utils.code_fixer import CodeFixer
10
- # from ..utils.theme_injector import inject_theme_setup # Legacy theme injection removed
11
  from ..inputs.processor import InputProcessor
12
 
13
  logger = logging.getLogger(__name__)
14
 
15
 
16
- def generate_animation_response(
17
- input_data: str,
18
- input_type: str = "text",
19
- category: str = "mathematical",
20
- max_attempts: int = 3
21
- ) -> str:
22
- """Generate Manim animation code from input with validation and auto-fixing.
23
-
24
  Args:
25
- input_data (str): User's input (text, URL, or PDF path)
26
- input_type (str): Type of input ('text', 'url', 'pdf')
27
- category (str): Animation category (tech_system, product_startup, mathematical)
28
- max_attempts (int): Maximum generation attempts
29
-
30
  Returns:
31
- str: Generated Manim animation code (validated and post-processed)
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- Raises:
34
- HTTPException: If code generation fails after all attempts
 
 
 
 
 
 
 
 
 
 
 
35
  """
36
  validator = CodeValidator()
37
  fixer = CodeFixer()
38
 
39
- # Process input to get the actual prompt text
40
- try:
41
- prompt = InputProcessor.process(input_type, input_data)
42
- except Exception as e:
43
- logger.error(f"Input processing failed: {e}")
44
- raise HTTPException(status_code=400, detail=f"Input processing failed: {str(e)}")
45
-
46
  primary_model = os.getenv("CODE_GEN_MODEL")
47
  fallback_model = os.getenv("CODE_GEN_FALLBACK_MODEL", primary_model)
48
 
@@ -85,9 +96,6 @@ def generate_animation_response(
85
  # Post-process the code to fix common issues
86
  processed_code = post_process_code(raw_code)
87
 
88
- # Legacy theme injection removed - themes are now handled by system prompts
89
- # processed_code = inject_theme_setup(processed_code, category)
90
-
91
  # Validate code
92
  is_valid, errors = validator.validate(processed_code)
93
 
@@ -114,6 +122,8 @@ def generate_animation_response(
114
 
115
  logger.info(f"Retrying code generation (attempt {attempt + 2}/{max_attempts})")
116
 
 
 
117
  except Exception as e:
118
  logger.error(f"Error in code generation attempt {attempt + 1}: {str(e)}")
119
  if attempt == max_attempts - 1:
@@ -127,3 +137,64 @@ def generate_animation_response(
127
  status_code=500,
128
  detail="Failed to generate valid animation code after all attempts"
129
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from ..utils.code_postprocessor import post_process_code
8
  from ..utils.code_validator import CodeValidator
9
  from ..utils.code_fixer import CodeFixer
 
10
  from ..inputs.processor import InputProcessor
11
 
12
  logger = logging.getLogger(__name__)
13
 
14
 
15
+ def _generate_with_reflexion(prompt: str, category: str, job_id: str = "unknown") -> str:
16
+ """
17
+ Generate code using the Reflexion Agent.
18
+
 
 
 
 
19
  Args:
20
+ prompt: Processed user prompt
21
+ category: Animation category
22
+ job_id: Job identifier for logging
23
+
 
24
  Returns:
25
+ Generated and refined code
26
+ """
27
+ from ..agents.reflexion_agent import ReflexionAgent
28
+
29
+ agent = ReflexionAgent()
30
+ code, state = agent.generate_with_reflection(
31
+ goal=prompt,
32
+ category=category,
33
+ job_id=job_id
34
+ )
35
+
36
+ logger.info(f"πŸ”„ Reflexion complete: {state.iteration} iterations, {state.total_issues_found} issues found, {state.total_issues_fixed} fixed")
37
+
38
+ return code
39
 
40
+
41
+ def _generate_legacy(prompt: str, category: str, max_attempts: int = 3) -> str:
42
+ """
43
+ Legacy code generation without Reflexion.
44
+ Used as fallback when Reflexion is disabled or fails.
45
+
46
+ Args:
47
+ prompt: Processed user prompt
48
+ category: Animation category
49
+ max_attempts: Maximum generation attempts
50
+
51
+ Returns:
52
+ Generated code
53
  """
54
  validator = CodeValidator()
55
  fixer = CodeFixer()
56
 
 
 
 
 
 
 
 
57
  primary_model = os.getenv("CODE_GEN_MODEL")
58
  fallback_model = os.getenv("CODE_GEN_FALLBACK_MODEL", primary_model)
59
 
 
96
  # Post-process the code to fix common issues
97
  processed_code = post_process_code(raw_code)
98
 
 
 
 
99
  # Validate code
100
  is_valid, errors = validator.validate(processed_code)
101
 
 
122
 
123
  logger.info(f"Retrying code generation (attempt {attempt + 2}/{max_attempts})")
124
 
125
+ except HTTPException:
126
+ raise
127
  except Exception as e:
128
  logger.error(f"Error in code generation attempt {attempt + 1}: {str(e)}")
129
  if attempt == max_attempts - 1:
 
137
  status_code=500,
138
  detail="Failed to generate valid animation code after all attempts"
139
  )
140
+
141
+
142
+ def generate_animation_response(
143
+ input_data: str,
144
+ input_type: str = "text",
145
+ category: str = "mathematical",
146
+ max_attempts: int = 3,
147
+ job_id: str = "unknown"
148
+ ) -> str:
149
+ """Generate Manim animation code from input with validation and auto-fixing.
150
+
151
+ Uses Reflexion Agent when enabled (REFLEXION_ENABLED=true) for improved
152
+ code quality through self-critique and iterative improvement.
153
+
154
+ Args:
155
+ input_data (str): User's input (text, URL, or PDF path)
156
+ input_type (str): Type of input ('text', 'url', 'pdf')
157
+ category (str): Animation category (tech_system, product_startup, mathematical)
158
+ max_attempts (int): Maximum generation attempts (for legacy mode)
159
+ job_id (str): Job identifier for logging
160
+
161
+ Returns:
162
+ str: Generated Manim animation code (validated and post-processed)
163
+
164
+ Raises:
165
+ HTTPException: If code generation fails after all attempts
166
+ """
167
+ # Process input to get the actual prompt text
168
+ try:
169
+ prompt = InputProcessor.process(input_type, input_data)
170
+ except Exception as e:
171
+ logger.error(f"Input processing failed: {e}")
172
+ raise HTTPException(status_code=400, detail=f"Input processing failed: {str(e)}")
173
+
174
+ # Check if Reflexion is enabled
175
+ reflexion_enabled = os.getenv("REFLEXION_ENABLED", "true").lower() == "true"
176
+
177
+ if reflexion_enabled:
178
+ logger.info("πŸ”„ Using Reflexion Agent for code generation")
179
+ try:
180
+ code = _generate_with_reflexion(prompt, category, job_id)
181
+
182
+ # Final validation
183
+ validator = CodeValidator()
184
+ fixer = CodeFixer()
185
+
186
+ is_valid, errors = validator.validate(code)
187
+ if not is_valid:
188
+ logger.warning(f"Reflexion code has {len(errors)} validation errors, attempting auto-fix")
189
+ code, is_fixed, _ = fixer.fix_and_validate(code, max_attempts=2)
190
+
191
+ return code
192
+
193
+ except Exception as e:
194
+ logger.error(f"Reflexion failed, falling back to legacy: {e}")
195
+ # Fall through to legacy generation
196
+ else:
197
+ logger.info("πŸ“ Using legacy code generation (Reflexion disabled)")
198
+
199
+ # Legacy generation (fallback or when Reflexion is disabled)
200
+ return _generate_legacy(prompt, category, max_attempts)