Spaces:
Sleeping
Sleeping
File size: 4,327 Bytes
6fc3143 60e9dd0 84591f9 60e9dd0 84591f9 6fc3143 60e9dd0 1110dbd 6fc3143 1110dbd 6fc3143 1110dbd 6fc3143 1110dbd 6fc3143 1110dbd 6fc3143 1110dbd 6fc3143 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import logging
from pathlib import Path
from manim import Scene
from manimator.services.voiceover import SimpleElevenLabsService
logger = logging.getLogger(__name__)
class VoiceoverTracker:
"""
Simple tracker to mimic the behavior of manim-voiceover's tracker.
Used in the 'with self.voiceover(...) as tracker:' context.
"""
def __init__(self, duration: float):
self.duration = duration
class VoiceoverScene(Scene):
"""
A robust base class for scenes with voiceovers.
Replaces the fragile manim-voiceover library with a direct implementation.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.speech_service = SimpleElevenLabsService()
def set_speech_service(self, service):
"""
Set the speech service. Kept for compatibility with generated code patterns,
but primarily we use the internal SimpleElevenLabsService.
"""
if isinstance(service, SimpleElevenLabsService):
self.speech_service = service
# If it's the old service type, we ignore it or log a warning,
# but for now we assume generated code will be updated to use the new service or
# we just use our default one if they pass something else.
def voiceover(self, text: str):
"""
Context manager for voiceovers.
Generates audio, adds it to the scene, and handles timing.
"""
# Generate audio
audio_path = self.speech_service.generate_from_text(text)
# Convert to absolute path
absolute_audio_path = Path(audio_path).resolve()
# Verify the file exists
if not absolute_audio_path.exists():
logger.error(f"Audio file not found: {absolute_audio_path}")
raise FileNotFoundError(f"Generated audio file not found: {absolute_audio_path}")
logger.info(f"Adding audio to scene: {absolute_audio_path} (size: {absolute_audio_path.stat().st_size} bytes)")
# Add audio to scene
self.add_sound(str(absolute_audio_path))
# Get audio duration
duration = self._get_audio_duration(absolute_audio_path)
return _VoiceoverContext(self, duration)
def _get_audio_duration(self, file_path: Path) -> float:
"""
Get duration of mp3 file.
Uses mutagen if available (installed by manim-voiceover), otherwise estimates.
"""
try:
from mutagen.mp3 import MP3
audio = MP3(file_path)
return audio.info.length
except ImportError:
logger.warning("mutagen not found, estimating duration based on file size")
# Rough estimate: 1MB ~ 1 minute for 128kbps mp3
# This is a fallback and might be inaccurate
size_bytes = file_path.stat().st_size
# 128 kbps = 16 KB/s
return size_bytes / 16000.0
except Exception as e:
logger.error(f"Error reading audio duration: {e}")
return 2.0 # Safe default fallback
class _VoiceoverContext:
"""Context manager helper"""
def __init__(self, scene: Scene, duration: float):
self.scene = scene
self.duration = duration
self.tracker = VoiceoverTracker(duration)
def __enter__(self):
# Capture start time from the renderer
# This allows us to track how much time passes during the animations inside the block
self.start_time = self.scene.renderer.time
return self.tracker
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type:
return # Don't wait if there was an exception
# Calculate how much time passed during the block
current_time = self.scene.renderer.time
elapsed = current_time - self.start_time
# Calculate remaining duration of the audio
remaining = self.duration - elapsed
# If the animations were shorter than the audio, wait for the rest
if remaining > 0:
# Add a small buffer to ensure clean separation
self.scene.wait(remaining)
# Optional: Add a tiny pause between voiceovers for better pacing
# self.scene.wait(0.1)
|