VidSimplify / manimator /scene /voiceover_scene.py
Adityahulk
integrating free voice
60e9dd0
import logging
from pathlib import Path
from manim import Scene
from manimator.services.voiceover import SimpleElevenLabsService
logger = logging.getLogger(__name__)
class VoiceoverTracker:
"""
Simple tracker to mimic the behavior of manim-voiceover's tracker.
Used in the 'with self.voiceover(...) as tracker:' context.
"""
def __init__(self, duration: float):
self.duration = duration
class VoiceoverScene(Scene):
"""
A robust base class for scenes with voiceovers.
Replaces the fragile manim-voiceover library with a direct implementation.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.speech_service = SimpleElevenLabsService()
def set_speech_service(self, service):
"""
Set the speech service. Kept for compatibility with generated code patterns,
but primarily we use the internal SimpleElevenLabsService.
"""
if isinstance(service, SimpleElevenLabsService):
self.speech_service = service
# If it's the old service type, we ignore it or log a warning,
# but for now we assume generated code will be updated to use the new service or
# we just use our default one if they pass something else.
def voiceover(self, text: str):
"""
Context manager for voiceovers.
Generates audio, adds it to the scene, and handles timing.
"""
# Generate audio
audio_path = self.speech_service.generate_from_text(text)
# Convert to absolute path
absolute_audio_path = Path(audio_path).resolve()
# Verify the file exists
if not absolute_audio_path.exists():
logger.error(f"Audio file not found: {absolute_audio_path}")
raise FileNotFoundError(f"Generated audio file not found: {absolute_audio_path}")
logger.info(f"Adding audio to scene: {absolute_audio_path} (size: {absolute_audio_path.stat().st_size} bytes)")
# Add audio to scene
self.add_sound(str(absolute_audio_path))
# Get audio duration
duration = self._get_audio_duration(absolute_audio_path)
return _VoiceoverContext(self, duration)
def _get_audio_duration(self, file_path: Path) -> float:
"""
Get duration of mp3 file.
Uses mutagen if available (installed by manim-voiceover), otherwise estimates.
"""
try:
from mutagen.mp3 import MP3
audio = MP3(file_path)
return audio.info.length
except ImportError:
logger.warning("mutagen not found, estimating duration based on file size")
# Rough estimate: 1MB ~ 1 minute for 128kbps mp3
# This is a fallback and might be inaccurate
size_bytes = file_path.stat().st_size
# 128 kbps = 16 KB/s
return size_bytes / 16000.0
except Exception as e:
logger.error(f"Error reading audio duration: {e}")
return 2.0 # Safe default fallback
class _VoiceoverContext:
"""Context manager helper"""
def __init__(self, scene: Scene, duration: float):
self.scene = scene
self.duration = duration
self.tracker = VoiceoverTracker(duration)
def __enter__(self):
# Capture start time from the renderer
# This allows us to track how much time passes during the animations inside the block
self.start_time = self.scene.renderer.time
return self.tracker
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type:
return # Don't wait if there was an exception
# Calculate how much time passed during the block
current_time = self.scene.renderer.time
elapsed = current_time - self.start_time
# Calculate remaining duration of the audio
remaining = self.duration - elapsed
# If the animations were shorter than the audio, wait for the rest
if remaining > 0:
# Add a small buffer to ensure clean separation
self.scene.wait(remaining)
# Optional: Add a tiny pause between voiceovers for better pacing
# self.scene.wait(0.1)