Adityahulk commited on
Commit
c648277
Β·
1 Parent(s): 12fe8d7

free voice and improvements

Browse files
manimator/agents/reflexion_agent.py CHANGED
@@ -308,7 +308,7 @@ self.play(items[2].animate.scale(1.1).set_color(GREEN))
308
  def _critique_code(self, code: str, category: str) -> CritiqueResult:
309
  """Critique code and return structured issues"""
310
 
311
- critique_prompt = f"""You are an expert Manim code reviewer. Analyze this {category} animation code for potential issues.
312
 
313
  CODE TO REVIEW:
314
  ```python
@@ -316,105 +316,108 @@ CODE TO REVIEW:
316
  ```
317
 
318
  # ============================================================================
319
- # CRITICAL REVIEW CATEGORIES - CHECK ALL CAREFULLY
320
  # ============================================================================
321
 
322
- ## 1. 🚨 SCREEN BOUNDARY ISSUES (HIGH PRIORITY)
 
 
 
 
323
 
324
- Check if content will GO OFF SCREEN:
325
- - Count items in VGroups - if 5+ items are arranged vertically, is there `scale_to_fit_height()`?
326
- - Are there multiple items stacked without proper scaling?
327
- - Is `config.frame_height` or `config.frame_width` used for boundary checks?
328
- - **RED FLAG**: VGroup with 4+ items arranged(DOWN) WITHOUT scale_to_fit_height = CRITICAL ERROR
329
- - **RED FLAG**: Large groups not using safe margins (buff < 0.5)
330
 
331
- **Expected pattern for 4+ items:**
332
- ```python
333
- group.scale_to_fit_height(config.frame_height - 2.5)
334
- ```
 
 
 
335
 
336
- ## 2. 🎬 ANIMATION VARIETY & ENGAGEMENT (HIGH PRIORITY)
 
 
 
337
 
338
- Check if video will be STATIC/BORING:
339
- - Count animation types used - are there at least 3-4 different types?
340
- - **RED FLAG**: Only using `Write()` for all animations
341
- - **RED FLAG**: No emphasis animations (Indicate, Circumscribe, Flash, etc.)
342
- - **RED FLAG**: No LaggedStart for list animations
343
- - **RED FLAG**: Long `self.wait()` calls (> 1 second) without visual activity
344
- - **RED FLAG**: Simple FadeIn/FadeOut without shift parameters
345
 
346
- **Good animations to look for:**
347
- - `FadeIn(obj, shift=UP/DOWN/LEFT/RIGHT)` βœ“
348
- - `LaggedStart(*[...], lag_ratio=0.2)` βœ“
349
- - `Indicate()`, `Circumscribe()`, `Flash()` βœ“
350
- - `obj.animate.scale(1.1).set_color(YELLOW)` βœ“
351
- - `GrowFromCenter()`, `DrawBorderThenFill()` βœ“
352
 
353
- ## 3. πŸ“ VISUAL OVERLAPS
354
 
355
- - VGroup misuse (arranging mixed types together)
356
- - Objects placed at same position without offset
357
- - Text stacking on top of other text
358
- - Elements not using next_to() or arrange() properly
359
 
360
- ## 4. πŸ”§ MANIM API MISUSE
 
 
361
 
362
- - Invalid parameters (corner_radius on Rectangle, etc.)
363
- - Deprecated methods
364
- - Incorrect animation calls
 
365
 
366
- ## 5. πŸ’‘ LOGIC ERRORS
367
 
368
- - Objects used before definition
369
- - Animations on removed objects
370
- - Incorrect loop logic
 
 
371
 
372
- ## 6. ✨ BEST PRACTICES
 
 
373
 
374
- - Blank screens during voiceover (no visuals while talking)
375
- - Missing cleanup (FadeOut before new content)
376
- - Poor visual hierarchy
377
- - No transitions between sections (just FadeOut/FadeIn without motion)
378
 
379
  # ============================================================================
 
 
 
 
 
 
 
380
 
381
- For EACH issue found, provide:
382
- - severity: "low" | "medium" | "high"
383
- - category: "OFF_SCREEN" | "STATIC_VIDEO" | "OVERLAP" | "API_MISUSE" | "LOGIC_ERROR" | "BEST_PRACTICE"
384
- - line_range: [start_line, end_line] if identifiable
385
- - description: What's wrong
386
- - suggestion: How to fix it
387
 
388
- **SEVERITY GUIDE:**
389
- - HIGH: Content goes off-screen, only Write() animations used, major overlaps
390
- - MEDIUM: Missing emphasis animations, no LaggedStart for lists, long waits
391
- - LOW: Minor styling issues, could be slightly more dynamic
392
 
393
- If the code is well-written with no significant issues, respond with:
394
- {{"has_issues": false, "overall_severity": "none", "issues": [], "summary": "Code is well-structured"}}
395
 
396
  Respond ONLY with valid JSON in this exact format:
397
  ```json
398
  {{
399
  "has_issues": true,
400
- "overall_severity": "high",
401
  "issues": [
402
  {{
403
- "severity": "high",
404
- "category": "OFF_SCREEN",
405
- "line_range": [45, 52],
406
- "description": "VGroup with 6 items arranged vertically without scale_to_fit_height - content will go off bottom of screen",
407
- "suggestion": "Add: group.scale_to_fit_height(config.frame_height - 2.5) after arrange()"
408
  }},
409
  {{
410
- "severity": "high",
411
- "category": "STATIC_VIDEO",
412
- "line_range": [1, 100],
413
- "description": "Only Write() and FadeIn() animations used - video will feel static and boring",
414
- "suggestion": "Add emphasis animations: Indicate(), Circumscribe(). Use LaggedStart for lists. Add .animate chains for motion."
415
  }}
416
  ],
417
- "summary": "Found 2 critical issues: content goes off-screen and animations lack variety"
418
  }}
419
  ```
420
  """
@@ -460,26 +463,44 @@ Respond ONLY with valid JSON in this exact format:
460
  for i in critique.issues if i.suggestion
461
  ])
462
 
463
- fix_prompt = f"""Fix this Manim code based on the expert code review.
464
 
465
  ORIGINAL CODE:
466
  ```python
467
  {original_code}
468
  ```
469
 
470
- ISSUES IDENTIFIED:
471
  {issues_summary}
472
 
473
- SPECIFIC FIX SUGGESTIONS:
474
  {suggestions}
475
 
476
- INSTRUCTIONS:
477
- 1. Apply ALL suggested fixes
478
- 2. Preserve all working parts of the code
479
- 3. Ensure no new issues are introduced
480
- 4. Keep the same class name and overall structure
481
 
482
- Return the COMPLETE fixed Python code.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
  """
484
 
485
  try:
 
308
  def _critique_code(self, code: str, category: str) -> CritiqueResult:
309
  """Critique code and return structured issues"""
310
 
311
+ critique_prompt = f"""You are a CREATIVE ENHANCEMENT advisor for Manim animations. Your job is to make animations MORE beautiful, dynamic, and engaging - NOT to simplify them.
312
 
313
  CODE TO REVIEW:
314
  ```python
 
316
  ```
317
 
318
  # ============================================================================
319
+ # YOUR ROLE: ENHANCE CREATIVITY, NOT RESTRICT IT
320
  # ============================================================================
321
 
322
+ You are here to IMPROVE animations, not simplify them. Focus on:
323
+ 1. Adding MORE visual interest, not removing it
324
+ 2. Suggesting ADDITIONAL animations to make it more engaging
325
+ 3. Only flag ACTUAL bugs that will cause crashes
326
+ 4. PRESERVE all creative animations - do NOT suggest removing them
327
 
328
+ # ============================================================================
329
+ # WHAT TO CHECK (IN ORDER OF PRIORITY)
330
+ # ============================================================================
 
 
 
331
 
332
+ ## 1. πŸ› ACTUAL BUGS (Only flag if they will CRASH the code)
333
+
334
+ These are the ONLY high-severity issues:
335
+ - Invalid Manim parameters that don't exist (corner_radius on Rectangle)
336
+ - Objects used before they are defined
337
+ - Animating objects that have been removed/cleared
338
+ - Syntax errors
339
 
340
+ **DO NOT flag as bugs:**
341
+ - Things that "might" go off screen (the code handles this)
342
+ - Animation choices you disagree with (respect the creativity)
343
+ - Use of specific animation types
344
 
345
+ ## 2. 🎨 ENHANCEMENT SUGGESTIONS (Help make it MORE beautiful)
 
 
 
 
 
 
346
 
347
+ Suggest ADDITIONS to make animations more impressive:
348
+ - "Consider adding Circumscribe() after showing key concepts"
349
+ - "The list would look more dynamic with LaggedStart"
350
+ - "Add a subtle pulse animation while explaining"
351
+ - "Use GrowFromCenter for more dramatic reveal"
352
+ - "Add color transitions with .animate.set_color()"
353
 
354
+ **These should be LOW severity - suggestions, not requirements.**
355
 
356
+ ## 3. πŸ“ DEFINITE OVERLAPS (Only if objects are DEFINITELY at the same position)
 
 
 
357
 
358
+ Only flag overlaps if:
359
+ - Two Text objects are created at ORIGIN without any positioning
360
+ - Objects are explicitly placed at the same coordinates
361
 
362
+ **DO NOT flag as overlaps:**
363
+ - Objects using arrange() or next_to() - these handle spacing
364
+ - VGroups - they handle their own layout
365
+ - Anything using .to_edge() or similar
366
 
367
+ ## 4. ⚑ ANIMATION VARIETY SUGGESTIONS (Encourage MORE, not less)
368
 
369
+ If animations seem basic, suggest ADDING:
370
+ - "Add Indicate() to highlight important elements"
371
+ - "Use Flash() for emphasis on key points"
372
+ - "Consider Wiggle() for playful moments"
373
+ - "Add subtle scale animations during explanations"
374
 
375
+ # ============================================================================
376
+ # SEVERITY GUIDE (BE LENIENT)
377
+ # ============================================================================
378
 
379
+ - **HIGH**: ONLY for code that will CRASH (invalid API, undefined variables)
380
+ - **MEDIUM**: Definite overlaps (same exact position without spacing)
381
+ - **LOW**: Suggestions to enhance (add more animations, make more dynamic)
 
382
 
383
  # ============================================================================
384
+ # IMPORTANT: PRESERVE CREATIVITY
385
+ # ============================================================================
386
+
387
+ - If the code uses creative animations, PRAISE them and suggest additions
388
+ - NEVER suggest simplifying complex animations
389
+ - NEVER suggest removing animations that work
390
+ - Your goal is to make the video MORE impressive, not safer
391
 
392
+ # ============================================================================
 
 
 
 
 
393
 
394
+ If the code is creative and well-animated, respond with:
395
+ {{"has_issues": false, "overall_severity": "none", "issues": [], "summary": "Excellent creative code! animations are dynamic and engaging."}}
 
 
396
 
397
+ For enhancement suggestions (LOW severity), use category "ENHANCEMENT".
 
398
 
399
  Respond ONLY with valid JSON in this exact format:
400
  ```json
401
  {{
402
  "has_issues": true,
403
+ "overall_severity": "low",
404
  "issues": [
405
  {{
406
+ "severity": "low",
407
+ "category": "ENHANCEMENT",
408
+ "line_range": [50, 55],
409
+ "description": "The component reveals could be more dramatic",
410
+ "suggestion": "Add GrowFromCenter() or SpinInFromNothing() for a more impressive reveal effect"
411
  }},
412
  {{
413
+ "severity": "low",
414
+ "category": "ENHANCEMENT",
415
+ "line_range": [80, 85],
416
+ "description": "Key concepts could use more emphasis",
417
+ "suggestion": "Add Circumscribe() or Flash() after revealing important elements to draw attention"
418
  }}
419
  ],
420
+ "summary": "Good creative code! Suggested 2 ways to make it even more impressive."
421
  }}
422
  ```
423
  """
 
463
  for i in critique.issues if i.suggestion
464
  ])
465
 
466
+ fix_prompt = f"""ENHANCE this Manim animation code based on the creative suggestions.
467
 
468
  ORIGINAL CODE:
469
  ```python
470
  {original_code}
471
  ```
472
 
473
+ ENHANCEMENT SUGGESTIONS:
474
  {issues_summary}
475
 
476
+ SPECIFIC IMPROVEMENTS TO ADD:
477
  {suggestions}
478
 
479
+ # ============================================================================
480
+ # CRITICAL INSTRUCTIONS - ENHANCE, DON'T SIMPLIFY
481
+ # ============================================================================
 
 
482
 
483
+ 1. **PRESERVE ALL EXISTING ANIMATIONS** - Do NOT remove any working animations
484
+ 2. **ADD the suggested enhancements** - More animations = better
485
+ 3. **Keep all creative elements** - Complex animations are GOOD
486
+ 4. **Maintain the same structure** - Same class name, same voiceovers
487
+ 5. **Add MORE visual interest** - Additional effects, emphasis, transitions
488
+
489
+ **EXAMPLES OF GOOD ENHANCEMENTS:**
490
+ - Add `self.play(Indicate(obj, color=YELLOW))` after important reveals
491
+ - Add `self.play(Circumscribe(obj))` to highlight key concepts
492
+ - Use `LaggedStart` for revealing lists: `LaggedStart(*[FadeIn(x, shift=UP) for x in items], lag_ratio=0.15)`
493
+ - Add subtle animations during explanations: `self.play(obj.animate.scale(1.05), run_time=2)`
494
+ - Use `Flash(obj)` for emphasis moments
495
+ - Add `GrowFromCenter` or `SpinInFromNothing` for dramatic reveals
496
+
497
+ **DO NOT:**
498
+ - Remove any animations that work
499
+ - Simplify complex animation sequences
500
+ - Reduce visual effects
501
+ - Make the code "safer" by removing creativity
502
+
503
+ Return the COMPLETE enhanced Python code with MORE impressive animations.
504
  """
505
 
506
  try:
manimator/services/voiceover.py CHANGED
@@ -3,15 +3,29 @@ import hashlib
3
  import json
4
  import logging
5
  import requests
 
6
  from pathlib import Path
7
  from typing import Optional, Dict, Any
8
 
9
  logger = logging.getLogger(__name__)
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  class SimpleElevenLabsService:
12
  """
13
  A simple, robust service for generating voiceovers using ElevenLabs API.
14
- Bypasses the complex inheritance of manim-voiceover to avoid path type errors.
15
  """
16
 
17
  DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM" # Rachel
@@ -28,9 +42,10 @@ class SimpleElevenLabsService:
28
  def __init__(self, voice_id: str = DEFAULT_VOICE_ID, cache_dir: Optional[Path] = None):
29
  # Resolve voice ID if it's a name
30
  self.voice_id = self.VOICE_MAPPING.get(voice_id, voice_id)
 
31
  self.api_key = os.getenv("ELEVENLABS_API_KEY")
32
  if not self.api_key:
33
- logger.warning("ELEVENLABS_API_KEY not set. Voiceover generation will fail.")
34
 
35
  # Use provided cache_dir or default
36
  if cache_dir:
@@ -60,8 +75,8 @@ class SimpleElevenLabsService:
60
 
61
  try:
62
  if not self.api_key:
63
- logger.warning("ELEVENLABS_API_KEY missing, falling back to gTTS")
64
- return self._generate_with_gtts(text)
65
 
66
  # Call ElevenLabs API
67
  url = f"{self.BASE_URL}/text-to-speech/{self.voice_id}"
@@ -91,17 +106,61 @@ class SimpleElevenLabsService:
91
  return output_path
92
 
93
  except Exception as e:
94
- logger.error(f"ElevenLabs generation failed: {str(e)}. Falling back to gTTS.")
95
- return self._generate_with_gtts(text)
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def _generate_with_gtts(self, text: str) -> Path:
98
  """
99
- Fallback generation using Google Text-to-Speech (free).
100
  """
101
  try:
102
  from gtts import gTTS
103
 
104
- # Use a separate cache for gTTS to avoid hash collisions if we switch back
105
  gtts_cache_dir = Path("media/voiceover/gtts")
106
  gtts_cache_dir.mkdir(parents=True, exist_ok=True)
107
 
@@ -121,4 +180,5 @@ class SimpleElevenLabsService:
121
 
122
  except Exception as e:
123
  logger.error(f"gTTS fallback failed: {str(e)}")
124
- raise RuntimeError(f"Voiceover generation failed (ElevenLabs and gTTS): {str(e)}")
 
 
3
  import json
4
  import logging
5
  import requests
6
+ import asyncio
7
  from pathlib import Path
8
  from typing import Optional, Dict, Any
9
 
10
  logger = logging.getLogger(__name__)
11
 
12
+
13
+ # Edge-TTS Voice mapping - high quality neural voices
14
+ EDGE_TTS_VOICES = {
15
+ "Rachel": "en-US-JennyNeural", # Female, clear and professional
16
+ "Adam": "en-US-GuyNeural", # Male, professional
17
+ "Bella": "en-US-AriaNeural", # Female, warm and friendly
18
+ "Josh": "en-US-ChristopherNeural", # Male, deep voice
19
+ "Indian": "en-IN-NeerjaNeural", # Indian English female
20
+ "British": "en-GB-SoniaNeural", # British female
21
+ "Australian": "en-AU-NatashaNeural", # Australian female
22
+ }
23
+
24
+
25
  class SimpleElevenLabsService:
26
  """
27
  A simple, robust service for generating voiceovers using ElevenLabs API.
28
+ Falls back to Edge TTS (Microsoft neural voices) if ElevenLabs fails.
29
  """
30
 
31
  DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM" # Rachel
 
42
  def __init__(self, voice_id: str = DEFAULT_VOICE_ID, cache_dir: Optional[Path] = None):
43
  # Resolve voice ID if it's a name
44
  self.voice_id = self.VOICE_MAPPING.get(voice_id, voice_id)
45
+ self.voice_name = voice_id # Store the voice name for edge-tts fallback
46
  self.api_key = os.getenv("ELEVENLABS_API_KEY")
47
  if not self.api_key:
48
+ logger.warning("ELEVENLABS_API_KEY not set. Will use Edge TTS (free).")
49
 
50
  # Use provided cache_dir or default
51
  if cache_dir:
 
75
 
76
  try:
77
  if not self.api_key:
78
+ logger.warning("ELEVENLABS_API_KEY missing, using Edge TTS")
79
+ return self._generate_with_edge_tts(text)
80
 
81
  # Call ElevenLabs API
82
  url = f"{self.BASE_URL}/text-to-speech/{self.voice_id}"
 
106
  return output_path
107
 
108
  except Exception as e:
109
+ logger.error(f"ElevenLabs generation failed: {str(e)}. Falling back to Edge TTS.")
110
+ return self._generate_with_edge_tts(text)
111
 
112
+ def _generate_with_edge_tts(self, text: str) -> Path:
113
+ """
114
+ Fallback generation using Microsoft Edge TTS (free, high quality).
115
+ Uses neural voices that sound natural and professional.
116
+ """
117
+ try:
118
+ import edge_tts
119
+
120
+ # Use a separate cache for edge-tts
121
+ edge_cache_dir = Path("media/voiceover/edge_tts")
122
+ edge_cache_dir.mkdir(parents=True, exist_ok=True)
123
+
124
+ # Map the voice name to edge-tts voice
125
+ edge_voice = EDGE_TTS_VOICES.get(self.voice_name, "en-US-JennyNeural")
126
+
127
+ content_hash = hashlib.md5(f"{text}-{edge_voice}".encode("utf-8")).hexdigest()
128
+ output_path = edge_cache_dir / f"{content_hash}.mp3"
129
+
130
+ if output_path.exists() and output_path.stat().st_size > 0:
131
+ logger.info(f"Using cached Edge TTS voiceover for hash {content_hash}")
132
+ return output_path
133
+
134
+ logger.info(f"Generating Edge TTS ({edge_voice}) for: {text[:30]}...")
135
+
136
+ # Edge-tts is async, so we need to run it in an event loop
137
+ async def _generate():
138
+ communicate = edge_tts.Communicate(text, edge_voice)
139
+ await communicate.save(str(output_path))
140
+
141
+ # Run the async function
142
+ try:
143
+ loop = asyncio.get_event_loop()
144
+ except RuntimeError:
145
+ loop = asyncio.new_event_loop()
146
+ asyncio.set_event_loop(loop)
147
+
148
+ loop.run_until_complete(_generate())
149
+
150
+ logger.info(f"Edge TTS voiceover saved to {output_path}")
151
+ return output_path
152
+
153
+ except Exception as e:
154
+ logger.error(f"Edge TTS failed: {str(e)}. Falling back to gTTS.")
155
+ return self._generate_with_gtts(text)
156
+
157
  def _generate_with_gtts(self, text: str) -> Path:
158
  """
159
+ Last resort fallback using Google Text-to-Speech.
160
  """
161
  try:
162
  from gtts import gTTS
163
 
 
164
  gtts_cache_dir = Path("media/voiceover/gtts")
165
  gtts_cache_dir.mkdir(parents=True, exist_ok=True)
166
 
 
180
 
181
  except Exception as e:
182
  logger.error(f"gTTS fallback failed: {str(e)}")
183
+ raise RuntimeError(f"All TTS methods failed: {str(e)}")
184
+
requirements.txt CHANGED
@@ -11,4 +11,5 @@ streamlit
11
  requests
12
  beautifulsoup4>=4.12.0
13
  lxml>=4.9.0
14
- readability-lxml>=0.8.1
 
 
11
  requests
12
  beautifulsoup4>=4.12.0
13
  lxml>=4.9.0
14
+ readability-lxml>=0.8.1
15
+ edge-tts>=6.1.0