Spaces:

aditya2001
/

VidSimplify

Running

App Files Files Community

Adityahulk commited on 7 days ago

Commit

e46a637

1 Parent(s): 5368281

integrating reflexion agent

Browse files

Files changed (5) hide show

.env.example +28 -1
api_server.py +2 -1
manimator/agents/__init__.py +9 -0
manimator/agents/reflexion_agent.py +533 -0
manimator/api/animation_generation.py +98 -27

.env.example CHANGED Viewed

@@ -3,8 +3,35 @@ PROMPT_SCENE_GEN_MODEL=groq/llama-3.3-70b-versatile
 PDF_SCENE_GEN_MODEL=gemini/gemini-1.5-flash
 PDF_RETRY_MODEL=gemini/gemini-2.0-flash-exp #Optional, only if you want to retry the PDF generation
 CODE_GEN_MODEL=openrouter/deepseek/deepseek-chat:free
 # Use the LiteLLM convention of naming the API keys depending on the models you choose
 GROQ_API_KEY=
 OPENROUTER_API_KEY=
-GEMINI_API_KEY=

 PDF_SCENE_GEN_MODEL=gemini/gemini-1.5-flash
 PDF_RETRY_MODEL=gemini/gemini-2.0-flash-exp #Optional, only if you want to retry the PDF generation
 CODE_GEN_MODEL=openrouter/deepseek/deepseek-chat:free
+CODE_GEN_FALLBACK_MODEL=openrouter/anthropic/claude-sonnet-4  # Optional fallback model
+# ============================================================================
+# Reflexion Agent Configuration
+# ============================================================================
+# The Reflexion Agent uses an Actor-Critic-Reflector loop for improved code quality.
+# It generates code, critiques it for issues, and fixes problems before rendering.
+# Enable/disable Reflexion Agent (default: true)
+REFLEXION_ENABLED=true
+# Critic model - used for code review (stronger model recommended)
+# This model analyzes code for visual overlaps, API misuse, logic errors, etc.
+REFLEXION_CRITIC_MODEL=openrouter/anthropic/claude-sonnet-4
+# Maximum iterations for reflexion loop (default: 2)
+# Each iteration: generate -> critique -> fix
+REFLEXION_MAX_ITERATIONS=2
+# ============================================================================
+# API Keys
+# ============================================================================
 # Use the LiteLLM convention of naming the API keys depending on the models you choose
 GROQ_API_KEY=
 OPENROUTER_API_KEY=
+GEMINI_API_KEY=
+# Internal API key for securing endpoints (generate a random UUID)
+INTERNAL_API_KEY=
+# ElevenLabs API key for voiceover generation
+ELEVENLABS_API_KEY=

api_server.py CHANGED Viewed

@@ -327,7 +327,8 @@ class VideoGenerator:
             code = generate_animation_response(
                 input_data=job.get("input_data", ""),
                 input_type=job["input_type"],
-                category=job["category"]
             )
             logger.info(f"✅ Code generation complete for job {job_id[:8]}...")

             code = generate_animation_response(
                 input_data=job.get("input_data", ""),
                 input_type=job["input_type"],
+                category=job["category"],
+                job_id=job_id  # Pass job_id for Reflexion logging
             )
             logger.info(f"✅ Code generation complete for job {job_id[:8]}...")

manimator/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+Agents module for Manimator
+Contains intelligent agents for code generation and improvement.
+"""
+from .reflexion_agent import ReflexionAgent, ReflectionState
+__all__ = ["ReflexionAgent", "ReflectionState"]

manimator/agents/reflexion_agent.py ADDED Viewed

	@@ -0,0 +1,533 @@

+"""
+Reflexion Agent for Manim Code Generation
+2-iteration self-improvement loop:
+- Actor: Generates code using primary model
+- Critic: Reviews code for issues (uses stronger model)
+- Reflector: Fixes issues based on critique
+Integrated with existing Manimator architecture.
+"""
+import json
+import logging
+import re
+import os
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import List, Optional, Tuple
+import litellm
+from ..utils.system_prompts import get_system_prompt
+from ..utils.code_postprocessor import post_process_code
+from ..utils.code_validator import CodeValidator
+logger = logging.getLogger(__name__)
+@dataclass
+class Issue:
+    """Single code issue identified by critic"""
+    severity: str  # "low" | "medium" | "high"
+    category: str  # "OVERLAP" | "API_MISUSE" | "LOGIC_ERROR" | "BEST_PRACTICE"
+    line_range: Optional[Tuple[int, int]] = None
+    description: str = ""
+    suggestion: str = ""
+@dataclass
+class CritiqueResult:
+    """Result from critic code review"""
+    has_issues: bool
+    overall_severity: str  # "none" | "low" | "medium" | "high"
+    issues: List[Issue] = field(default_factory=list)
+    summary: str = ""
+@dataclass
+class Reflection:
+    """Lesson learned from fixing an issue"""
+    iteration: int
+    original_issue: str
+    root_cause: str
+    lesson: str
+    timestamp: datetime = field(default_factory=datetime.now)
+@dataclass
+class ReflectionState:
+    """State of reflexion process"""
+    job_id: str
+    goal: str
+    category: str
+    iteration: int = 0
+    max_iterations: int = 2
+    current_code: str = ""
+    status: str = "GENERATING"  # GENERATING | CRITIQUING | FIXING | COMPLETE
+    critique: Optional[CritiqueResult] = None
+    reflection_memory: List[Reflection] = field(default_factory=list)
+    # Metrics
+    total_issues_found: int = 0
+    total_issues_fixed: int = 0
+    generation_time_ms: int = 0
+class ReflexionAgent:
+    """
+    Reflexion-based code generation with self-improvement
+    Uses a critic model to review code and an actor model for generation/fixing.
+    Max 2 iterations by default.
+    """
+    def __init__(self):
+        # Actor model (for generation and fixing) - uses existing CODE_GEN_MODEL
+        self.actor_model = os.getenv("CODE_GEN_MODEL", "openrouter/anthropic/claude-sonnet-4")
+        # Critic model (for code review) - uses stronger model for better critique
+        self.critic_model = os.getenv("REFLEXION_CRITIC_MODEL", "openrouter/anthropic/claude-sonnet-4")
+        # Configuration
+        self.max_iterations = int(os.getenv("REFLEXION_MAX_ITERATIONS", "2"))
+        self.enabled = os.getenv("REFLEXION_ENABLED", "true").lower() == "true"
+        # Validator for syntax checks
+        self.validator = CodeValidator()
+        logger.info(f"🔄 ReflexionAgent initialized")
+        logger.info(f"   Actor model: {self.actor_model}")
+        logger.info(f"   Critic model: {self.critic_model}")
+        logger.info(f"   Max iterations: {self.max_iterations}")
+        logger.info(f"   Enabled: {self.enabled}")
+    def generate_with_reflection(
+        self,
+        goal: str,
+        category: str,
+        job_id: str = "unknown"
+    ) -> Tuple[str, ReflectionState]:
+        """
+        Main reflexion loop - synchronous version for integration
+        Args:
+            goal: User's animation request (processed prompt)
+            category: Animation category (tech_system, product_startup, mathematical)
+            job_id: Job identifier for logging
+        Returns:
+            Tuple of (final_code, ReflectionState with stats)
+        """
+        import time
+        start_time = time.time()
+        state = ReflectionState(
+            job_id=job_id,
+            goal=goal,
+            category=category,
+            max_iterations=self.max_iterations
+        )
+        logger.info(f"🔄 Starting Reflexion for job {job_id[:8] if len(job_id) > 8 else job_id}")
+        logger.info(f"   Max iterations: {self.max_iterations}")
+        for iteration in range(1, self.max_iterations + 1):
+            state.iteration = iteration
+            logger.info(f"\n{'='*60}")
+            logger.info(f"REFLEXION ITERATION {iteration}/{self.max_iterations}")
+            logger.info(f"{'='*60}")
+            # [1] ACTOR: Generate code
+            state.status = "GENERATING"
+            logger.info("🤖 [ACTOR] Generating code...")
+            state.current_code = self._generate_code(
+                goal=goal,
+                category=category,
+                reflection_memory=state.reflection_memory
+            )
+            # Apply post-processing
+            state.current_code = post_process_code(state.current_code)
+            code_lines = state.current_code.count('\n') + 1
+            logger.info(f"   ✅ Generated {code_lines} lines of code")
+            # [2] CRITIC: Review code
+            state.status = "CRITIQUING"
+            logger.info("🔍 [CRITIC] Reviewing code...")
+            critique = self._critique_code(state.current_code, category)
+            state.critique = critique
+            state.total_issues_found += len(critique.issues)
+            if not critique.has_issues or critique.overall_severity == "none":
+                logger.info("   ✅ Critic: Code looks good!")
+                state.status = "COMPLETE"
+                break
+            if critique.overall_severity == "low":
+                logger.info(f"   ✅ Critic: Minor issues only ({len(critique.issues)}), acceptable")
+                state.status = "COMPLETE"
+                break
+            # Log issues found
+            logger.warning(f"   ⚠️  Critic found {len(critique.issues)} issues:")
+            for i, issue in enumerate(critique.issues[:5], 1):  # Show top 5
+                severity_emoji = {"high": "🔴", "medium": "🟡", "low": "🟢"}.get(issue.severity, "⚪")
+                logger.warning(f"      {i}. {severity_emoji} [{issue.category}] {issue.description[:80]}...")
+            # [3] REFLECTOR: Fix issues (only if not last iteration)
+            if iteration < self.max_iterations:
+                state.status = "FIXING"
+                logger.info("🔧 [REFLECTOR] Fixing issues...")
+                fixed_code, reflection = self._fix_code(
+                    original_code=state.current_code,
+                    critique=critique,
+                    iteration=iteration
+                )
+                # Apply post-processing to fixed code
+                state.current_code = post_process_code(fixed_code)
+                state.reflection_memory.append(reflection)
+                state.total_issues_fixed += len(critique.issues)
+                logger.info(f"   ✅ Fixed. Lesson: {reflection.lesson[:80]}...")
+            else:
+                logger.warning("   ⚠️  Max iterations reached, using best attempt")
+                state.status = "COMPLETE"
+        # Calculate metrics
+        state.generation_time_ms = int((time.time() - start_time) * 1000)
+        logger.info(f"\n{'='*60}")
+        logger.info("📊 REFLEXION COMPLETE")
+        logger.info(f"{'='*60}")
+        logger.info(f"   Total iterations: {state.iteration}")
+        logger.info(f"   Issues found: {state.total_issues_found}")
+        logger.info(f"   Issues fixed: {state.total_issues_fixed}")
+        logger.info(f"   Lessons learned: {len(state.reflection_memory)}")
+        logger.info(f"   Total time: {state.generation_time_ms}ms")
+        logger.info(f"   Final status: {state.status}")
+        return state.current_code, state
+    def _generate_code(
+        self,
+        goal: str,
+        category: str,
+        reflection_memory: List[Reflection]
+    ) -> str:
+        """Generate Manim code, including reflection lessons if available"""
+        # Build system prompt with lessons from memory
+        system_prompt = get_system_prompt(category)
+        if reflection_memory:
+            lessons = "\n".join([
+                f"- {r.lesson}" for r in reflection_memory
+            ])
+            system_prompt += f"""
+## 🚨 CRITICAL LESSONS FROM PREVIOUS ATTEMPT:
+{lessons}
+YOU MUST APPLY THESE LESSONS IN YOUR CODE! Do not repeat these mistakes.
+"""
+        # Build user message
+        user_message = f"""Create a video about:
+{goal}
+CRITICAL REMINDERS:
+1. NO BLANK SCREENS: Keep the screen populated. If a voiceover is playing, show something.
+2. NO OVERLAPS: Ensure text and objects do not overlap. Use `next_to` and `arrange` with proper `buff`.
+3. CLEAN TRANSITIONS: Fade out old content before showing new content.
+4. VARIED ANIMATIONS: Use a mix of Write, FadeIn, GrowFromCenter, etc.
+5. STAY ON SCREEN: Ensure all text and objects are within screen boundaries.
+"""
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_message}
+        ]
+        try:
+            response = litellm.completion(
+                model=self.actor_model,
+                messages=messages,
+                num_retries=2
+            )
+            content = response.choices[0].message.content
+            code = self._extract_code(content)
+            return code
+        except Exception as e:
+            logger.error(f"Actor generation failed: {e}")
+            raise
+    def _critique_code(self, code: str, category: str) -> CritiqueResult:
+        """Critique code and return structured issues"""
+        critique_prompt = f"""You are an expert Manim code reviewer. Analyze this {category} animation code for potential issues.
+CODE TO REVIEW:
+```python
+{code}
+```
+Review specifically for:
+1. **Visual Overlaps** (CRITICAL):
+   - VGroup misuse (arranging mixed types together)
+   - Objects placed at same position without offset
+   - Text stacking on top of other text
+   - Elements not using next_to() or arrange() properly
+2. **Manim API Misuse**:
+   - Invalid parameters (corner_radius on Rectangle, etc.)
+   - Deprecated methods
+   - Incorrect animation calls
+3. **Logic Errors**:
+   - Objects used before definition
+   - Animations on removed objects
+   - Incorrect loop logic
+4. **Best Practices**:
+   - Blank screens during voiceover
+   - Missing cleanup (FadeOut before new content)
+   - Objects going off-screen
+   - Poor animation variety
+For EACH issue found, provide:
+- severity: "low" | "medium" | "high"
+- category: "OVERLAP" | "API_MISUSE" | "LOGIC_ERROR" | "BEST_PRACTICE"
+- line_range: [start_line, end_line] if identifiable
+- description: What's wrong
+- suggestion: How to fix it
+If the code is well-written with no significant issues, respond with:
+{{"has_issues": false, "overall_severity": "none", "issues": [], "summary": "Code is well-structured"}}
+Respond ONLY with valid JSON in this exact format:
+```json
+{{
+  "has_issues": true,
+  "overall_severity": "high",
+  "issues": [
+    {{
+      "severity": "high",
+      "category": "OVERLAP",
+      "line_range": [65, 68],
+      "description": "VGroup(boxes, labels).arrange() treats these as 2 items, causing all boxes to overlap",
+      "suggestion": "Create individual VGroup pairs: [VGroup(box, label) for box, label in zip(boxes, labels)], then arrange pairs"
+    }}
+  ],
+  "summary": "Found 1 critical overlap issue"
+}}
+```
+"""
+        try:
+            response = litellm.completion(
+                model=self.critic_model,
+                messages=[{"role": "user", "content": critique_prompt}],
+                num_retries=2
+            )
+            content = response.choices[0].message.content
+            critique = self._parse_critique(content)
+            return critique
+        except Exception as e:
+            logger.error(f"Critic review failed: {e}")
+            # Return safe default (no issues) to allow code to proceed
+            return CritiqueResult(
+                has_issues=False,
+                overall_severity="none",
+                issues=[],
+                summary=f"Critique failed: {str(e)}"
+            )
+    def _fix_code(
+        self,
+        original_code: str,
+        critique: CritiqueResult,
+        iteration: int
+    ) -> Tuple[str, Reflection]:
+        """Fix code based on critique, return fixed code + lesson learned"""
+        # Build detailed fix prompt
+        issues_summary = "\n".join([
+            f"- [{i.severity.upper()}] {i.category} (Lines {i.line_range if i.line_range else 'N/A'}): {i.description}"
+            for i in critique.issues
+        ])
+        suggestions = "\n".join([
+            f"- {i.suggestion}"
+            for i in critique.issues if i.suggestion
+        ])
+        fix_prompt = f"""Fix this Manim code based on the expert code review.
+ORIGINAL CODE:
+```python
+{original_code}
+```
+ISSUES IDENTIFIED:
+{issues_summary}
+SPECIFIC FIX SUGGESTIONS:
+{suggestions}
+INSTRUCTIONS:
+1. Apply ALL suggested fixes
+2. Preserve all working parts of the code
+3. Ensure no new issues are introduced
+4. Keep the same class name and overall structure
+Return the COMPLETE fixed Python code.
+"""
+        try:
+            response = litellm.completion(
+                model=self.actor_model,
+                messages=[{"role": "user", "content": fix_prompt}],
+                num_retries=2
+            )
+            content = response.choices[0].message.content
+            fixed_code = self._extract_code(content)
+            # Create reflection/lesson from top issue
+            top_issue = critique.issues[0] if critique.issues else None
+            reflection = Reflection(
+                iteration=iteration,
+                original_issue=top_issue.description if top_issue else "Unknown issue",
+                root_cause=top_issue.category if top_issue else "UNKNOWN",
+                lesson=top_issue.suggestion if top_issue else "No specific lesson",
+                timestamp=datetime.now()
+            )
+            return fixed_code, reflection
+        except Exception as e:
+            logger.error(f"Reflector fix failed: {e}")
+            # Return original code if fix fails
+            reflection = Reflection(
+                iteration=iteration,
+                original_issue="Fix attempt failed",
+                root_cause="ERROR",
+                lesson=f"Fix failed: {str(e)}",
+                timestamp=datetime.now()
+            )
+            return original_code, reflection
+    def _extract_code(self, content: str) -> str:
+        """Extract Python code from markdown-wrapped response"""
+        # Try to find ```python code blocks
+        match = re.search(r'```python\n(.*?)```', content, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        # Try generic code blocks
+        match = re.search(r'```\n(.*?)```', content, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        # Fallback: return as-is (might already be code)
+        return content.strip()
+    def _parse_critique(self, content: str) -> CritiqueResult:
+        """Parse critic JSON response into CritiqueResult"""
+        try:
+            # Try to extract JSON from markdown
+            match = re.search(r'```json\n(.*?)```', content, re.DOTALL)
+            if match:
+                json_str = match.group(1)
+            else:
+                # Try to find raw JSON
+                json_str = content
+            # Clean up common issues
+            json_str = json_str.strip()
+            data = json.loads(json_str)
+            issues = []
+            for i in data.get("issues", []):
+                line_range = None
+                if i.get("line_range"):
+                    try:
+                        lr = i["line_range"]
+                        if isinstance(lr, list) and len(lr) == 2:
+                            line_range = (int(lr[0]), int(lr[1]))
+                    except (ValueError, TypeError):
+                        pass
+                issues.append(Issue(
+                    severity=i.get("severity", "medium"),
+                    category=i.get("category", "UNKNOWN"),
+                    line_range=line_range,
+                    description=i.get("description", ""),
+                    suggestion=i.get("suggestion", "")
+                ))
+            return CritiqueResult(
+                has_issues=data.get("has_issues", False),
+                overall_severity=data.get("overall_severity", "low"),
+                issues=issues,
+                summary=data.get("summary", f"Found {len(issues)} issues" if issues else "No issues")
+            )
+        except json.JSONDecodeError as e:
+            logger.warning(f"Failed to parse critique as JSON: {e}")
+            logger.debug(f"Content: {content[:500]}")
+            # Try to extract any useful info from non-JSON response
+            has_issues = "issue" in content.lower() or "error" in content.lower() or "problem" in content.lower()
+            return CritiqueResult(
+                has_issues=has_issues,
+                overall_severity="low" if has_issues else "none",
+                issues=[],
+                summary="Could not parse structured critique"
+            )
+        except Exception as e:
+            logger.error(f"Critique parsing error: {e}")
+            return CritiqueResult(
+                has_issues=False,
+                overall_severity="none",
+                issues=[],
+                summary=f"Parse error: {str(e)}"
+            )
+# Convenience function for direct use
+def generate_with_reflexion(
+    goal: str,
+    category: str,
+    job_id: str = "unknown"
+) -> Tuple[str, ReflectionState]:
+    """
+    Convenience function to generate code with reflexion.
+    Args:
+        goal: User's animation request
+        category: Animation category
+        job_id: Job identifier
+    Returns:
+        Tuple of (code, state)
+    """
+    agent = ReflexionAgent()
+    return agent.generate_with_reflection(goal, category, job_id)

manimator/api/animation_generation.py CHANGED Viewed

@@ -7,42 +7,53 @@ from ..utils.system_prompts import get_system_prompt
 from ..utils.code_postprocessor import post_process_code
 from ..utils.code_validator import CodeValidator
 from ..utils.code_fixer import CodeFixer
-# from ..utils.theme_injector import inject_theme_setup # Legacy theme injection removed
 from ..inputs.processor import InputProcessor
 logger = logging.getLogger(__name__)
-def generate_animation_response(
-    input_data: str,
-    input_type: str = "text",
-    category: str = "mathematical",
-    max_attempts: int = 3
-) -> str:
-    """Generate Manim animation code from input with validation and auto-fixing.
     Args:
-        input_data (str): User's input (text, URL, or PDF path)
-        input_type (str): Type of input ('text', 'url', 'pdf')
-        category (str): Animation category (tech_system, product_startup, mathematical)
-        max_attempts (int): Maximum generation attempts
     Returns:
-        str: Generated Manim animation code (validated and post-processed)
-    Raises:
-        HTTPException: If code generation fails after all attempts
     """
     validator = CodeValidator()
     fixer = CodeFixer()
-    # Process input to get the actual prompt text
-    try:
-        prompt = InputProcessor.process(input_type, input_data)
-    except Exception as e:
-        logger.error(f"Input processing failed: {e}")
-        raise HTTPException(status_code=400, detail=f"Input processing failed: {str(e)}")
     primary_model = os.getenv("CODE_GEN_MODEL")
     fallback_model = os.getenv("CODE_GEN_FALLBACK_MODEL", primary_model)
@@ -85,9 +96,6 @@ def generate_animation_response(
             # Post-process the code to fix common issues
             processed_code = post_process_code(raw_code)
-            # Legacy theme injection removed - themes are now handled by system prompts
-            # processed_code = inject_theme_setup(processed_code, category)
             # Validate code
             is_valid, errors = validator.validate(processed_code)
@@ -114,6 +122,8 @@ def generate_animation_response(
             logger.info(f"Retrying code generation (attempt {attempt + 2}/{max_attempts})")
         except Exception as e:
             logger.error(f"Error in code generation attempt {attempt + 1}: {str(e)}")
             if attempt == max_attempts - 1:
@@ -127,3 +137,64 @@ def generate_animation_response(
         status_code=500,
         detail="Failed to generate valid animation code after all attempts"
     )

 from ..utils.code_postprocessor import post_process_code
 from ..utils.code_validator import CodeValidator
 from ..utils.code_fixer import CodeFixer
 from ..inputs.processor import InputProcessor
 logger = logging.getLogger(__name__)
+def _generate_with_reflexion(prompt: str, category: str, job_id: str = "unknown") -> str:
+    """
+    Generate code using the Reflexion Agent.
     Args:
+        prompt: Processed user prompt
+        category: Animation category
+        job_id: Job identifier for logging
     Returns:
+        Generated and refined code
+    """
+    from ..agents.reflexion_agent import ReflexionAgent
+    agent = ReflexionAgent()
+    code, state = agent.generate_with_reflection(
+        goal=prompt,
+        category=category,
+        job_id=job_id
+    )
+    logger.info(f"🔄 Reflexion complete: {state.iteration} iterations, {state.total_issues_found} issues found, {state.total_issues_fixed} fixed")
+    return code
+def _generate_legacy(prompt: str, category: str, max_attempts: int = 3) -> str:
+    """
+    Legacy code generation without Reflexion.
+    Used as fallback when Reflexion is disabled or fails.
+    Args:
+        prompt: Processed user prompt
+        category: Animation category
+        max_attempts: Maximum generation attempts
+    Returns:
+        Generated code
     """
     validator = CodeValidator()
     fixer = CodeFixer()
     primary_model = os.getenv("CODE_GEN_MODEL")
     fallback_model = os.getenv("CODE_GEN_FALLBACK_MODEL", primary_model)
             # Post-process the code to fix common issues
             processed_code = post_process_code(raw_code)
             # Validate code
             is_valid, errors = validator.validate(processed_code)
             logger.info(f"Retrying code generation (attempt {attempt + 2}/{max_attempts})")
+        except HTTPException:
+            raise
         except Exception as e:
             logger.error(f"Error in code generation attempt {attempt + 1}: {str(e)}")
             if attempt == max_attempts - 1:
         status_code=500,
         detail="Failed to generate valid animation code after all attempts"
     )
+def generate_animation_response(
+    input_data: str,
+    input_type: str = "text",
+    category: str = "mathematical",
+    max_attempts: int = 3,
+    job_id: str = "unknown"
+) -> str:
+    """Generate Manim animation code from input with validation and auto-fixing.
+    Uses Reflexion Agent when enabled (REFLEXION_ENABLED=true) for improved
+    code quality through self-critique and iterative improvement.
+    Args:
+        input_data (str): User's input (text, URL, or PDF path)
+        input_type (str): Type of input ('text', 'url', 'pdf')
+        category (str): Animation category (tech_system, product_startup, mathematical)
+        max_attempts (int): Maximum generation attempts (for legacy mode)
+        job_id (str): Job identifier for logging
+    Returns:
+        str: Generated Manim animation code (validated and post-processed)
+    Raises:
+        HTTPException: If code generation fails after all attempts
+    """
+    # Process input to get the actual prompt text
+    try:
+        prompt = InputProcessor.process(input_type, input_data)
+    except Exception as e:
+        logger.error(f"Input processing failed: {e}")
+        raise HTTPException(status_code=400, detail=f"Input processing failed: {str(e)}")
+    # Check if Reflexion is enabled
+    reflexion_enabled = os.getenv("REFLEXION_ENABLED", "true").lower() == "true"
+    if reflexion_enabled:
+        logger.info("🔄 Using Reflexion Agent for code generation")
+        try:
+            code = _generate_with_reflexion(prompt, category, job_id)
+            # Final validation
+            validator = CodeValidator()
+            fixer = CodeFixer()
+            is_valid, errors = validator.validate(code)
+            if not is_valid:
+                logger.warning(f"Reflexion code has {len(errors)} validation errors, attempting auto-fix")
+                code, is_fixed, _ = fixer.fix_and_validate(code, max_attempts=2)
+            return code
+        except Exception as e:
+            logger.error(f"Reflexion failed, falling back to legacy: {e}")
+            # Fall through to legacy generation
+    else:
+        logger.info("📝 Using legacy code generation (Reflexion disabled)")
+    # Legacy generation (fallback or when Reflexion is disabled)
+    return _generate_legacy(prompt, category, max_attempts)