File size: 4,327 Bytes
6fc3143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60e9dd0
84591f9
 
 
 
 
 
 
 
 
60e9dd0
84591f9
6fc3143
60e9dd0
1110dbd
6fc3143
 
 
1110dbd
6fc3143
 
1110dbd
6fc3143
 
 
 
1110dbd
6fc3143
1110dbd
 
 
 
 
 
6fc3143
 
1110dbd
6fc3143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import logging
from pathlib import Path
from manim import Scene
from manimator.services.voiceover import SimpleElevenLabsService

logger = logging.getLogger(__name__)

class VoiceoverTracker:
    """
    Simple tracker to mimic the behavior of manim-voiceover's tracker.
    Used in the 'with self.voiceover(...) as tracker:' context.
    """
    def __init__(self, duration: float):
        self.duration = duration

class VoiceoverScene(Scene):
    """
    A robust base class for scenes with voiceovers.
    Replaces the fragile manim-voiceover library with a direct implementation.
    """
    
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.speech_service = SimpleElevenLabsService()
        
    def set_speech_service(self, service):
        """
        Set the speech service. Kept for compatibility with generated code patterns,
        but primarily we use the internal SimpleElevenLabsService.
        """
        if isinstance(service, SimpleElevenLabsService):
            self.speech_service = service
        # If it's the old service type, we ignore it or log a warning, 
        # but for now we assume generated code will be updated to use the new service or 
        # we just use our default one if they pass something else.
        
    def voiceover(self, text: str):
        """
        Context manager for voiceovers.
        Generates audio, adds it to the scene, and handles timing.
        """
        # Generate audio
        audio_path = self.speech_service.generate_from_text(text)
        
        # Convert to absolute path
        absolute_audio_path = Path(audio_path).resolve()
        
        # Verify the file exists
        if not absolute_audio_path.exists():
            logger.error(f"Audio file not found: {absolute_audio_path}")
            raise FileNotFoundError(f"Generated audio file not found: {absolute_audio_path}")
        
        logger.info(f"Adding audio to scene: {absolute_audio_path} (size: {absolute_audio_path.stat().st_size} bytes)")
        
        # Add audio to scene
        self.add_sound(str(absolute_audio_path))
        
        # Get audio duration
        duration = self._get_audio_duration(absolute_audio_path)
        
        return _VoiceoverContext(self, duration)

    def _get_audio_duration(self, file_path: Path) -> float:
        """
        Get duration of mp3 file.
        Uses mutagen if available (installed by manim-voiceover), otherwise estimates.
        """
        try:
            from mutagen.mp3 import MP3
            audio = MP3(file_path)
            return audio.info.length
        except ImportError:
            logger.warning("mutagen not found, estimating duration based on file size")
            # Rough estimate: 1MB ~ 1 minute for 128kbps mp3
            # This is a fallback and might be inaccurate
            size_bytes = file_path.stat().st_size
            # 128 kbps = 16 KB/s
            return size_bytes / 16000.0
        except Exception as e:
            logger.error(f"Error reading audio duration: {e}")
            return 2.0 # Safe default fallback

class _VoiceoverContext:
    """Context manager helper"""
    def __init__(self, scene: Scene, duration: float):
        self.scene = scene
        self.duration = duration
        self.tracker = VoiceoverTracker(duration)
        
    def __enter__(self):
        # Capture start time from the renderer
        # This allows us to track how much time passes during the animations inside the block
        self.start_time = self.scene.renderer.time
        return self.tracker
        
    def __exit__(self, exc_type, exc_val, exc_tb):
        if exc_type:
            return  # Don't wait if there was an exception
            
        # Calculate how much time passed during the block
        current_time = self.scene.renderer.time
        elapsed = current_time - self.start_time
        
        # Calculate remaining duration of the audio
        remaining = self.duration - elapsed
        
        # If the animations were shorter than the audio, wait for the rest
        if remaining > 0:
            # Add a small buffer to ensure clean separation
            self.scene.wait(remaining)
            
        # Optional: Add a tiny pause between voiceovers for better pacing
        # self.scene.wait(0.1)