File size: 9,271 Bytes
83e35a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
"""
Select the most engaging frames for comic generation
Focuses on visual quality and storytelling, not showing emotion labels
"""

import os
import cv2
import srt
import json  # πŸ‘ˆ ADD THIS LINE
from typing import List, Dict, Tuple
import numpy as np
from backend.enhanced_emotion_matcher import EnhancedEmotionMatcher
from backend.eye_state_detector import EyeStateDetector
from backend.emotion_aware_comic import FacialExpressionAnalyzer

def generate_keyframes_engaging(video_path: str, story_subs: List, max_frames: int = 48):
    """
    Select the most engaging frames for comic generation
    
    Criteria:
    1. Facial expression matches dialogue mood
    2. Eyes are open (no blinking)
    3. Good composition (face visible, not blurry)
    4. Dramatic/interesting moments
    """
    
    print(f"🎬 Selecting most engaging frames for comic generation...")
    print(f"πŸ“Š Processing {len(story_subs)} story moments")
    
    # Initialize analyzers (used internally, not shown to user)
    emotion_matcher = EnhancedEmotionMatcher()
    face_analyzer = FacialExpressionAnalyzer()
    eye_detector = EyeStateDetector()
    
    # Ensure output directory exists
    final_dir = "frames/final"
    os.makedirs(final_dir, exist_ok=True)
    
    # Clear existing frames
    for f in os.listdir(final_dir):
        if f.endswith('.png'):
            os.remove(os.path.join(final_dir, f))
    
    # Open video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"❌ Failed to open video: {video_path}")
        return False
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    print(f"πŸ“Ή Analyzing video: {fps:.1f} fps, {total_frames} frames")
    print(f"πŸ” Finding best frames for each story moment...")
    
    # Track frame filename -> original timestamp
    frame_metadata = {}
    
    # Process each subtitle
    selected_count = 0
    
    for idx, sub in enumerate(story_subs[:max_frames]):
        # Don't show emotion analysis to user, just use it internally
        text_emotions = emotion_matcher.analyze_text_emotion(sub.content)
        target_mood = max(text_emotions.items(), 
                         key=lambda x: x[1] if x[0] != 'intensity' else 0)[0]
        
        # Progress indicator (simple, not technical)
        if idx % 5 == 0:
            print(f"  Processing moments {idx+1}-{min(idx+5, len(story_subs))}...")
        
        # Find the most engaging frame for this moment
        best_frame = find_most_engaging_frame(
            cap, sub, fps, 
            face_analyzer, eye_detector,
            target_mood, text_emotions
        )
        
        if best_frame is not None:
            # Save the selected frame with consistent naming
            filename = f"frame_{selected_count:03d}.png"
            output_path = os.path.join(final_dir, filename)
            
            # Apply any visual enhancements for comic style
            enhanced_frame = enhance_for_comic(best_frame['image'])
            cv2.imwrite(output_path, enhanced_frame)
            
            # Store original timestamp (midpoint of subtitle)
            original_timestamp = sub.start.total_seconds() + (sub.end.total_seconds() - sub.start.total_seconds()) / 2
            frame_metadata[filename] = original_timestamp
            
            selected_count += 1
        else:
            # Fallback: get a decent frame from the middle
            fallback_frame = get_decent_frame(cap, sub, fps)
            if fallback_frame is not None:
                filename = f"frame_{selected_count:03d}.png"
                output_path = os.path.join(final_dir, filename)
                enhanced_frame = enhance_for_comic(fallback_frame)
                cv2.imwrite(output_path, enhanced_frame)
                
                # Store fallback timestamp
                original_timestamp = sub.start.total_seconds() + (sub.end.total_seconds() - sub.start.total_seconds()) / 2
                frame_metadata[filename] = original_timestamp
                
                selected_count += 1
    
    cap.release()
    
    # Save metadata for regeneration (critical for video-based regenerate)
    with open("frames/frame_metadata.json", "w") as f:
        json.dump(frame_metadata, f, indent=2)
    
    print(f"\nβœ… Selected {selected_count} engaging frames for comic")
    print(f"πŸ“ Frames saved to: {final_dir}")
    print(f"πŸ’Ύ Frame metadata saved to: frames/frame_metadata.json")
    
    return selected_count > 0


def find_most_engaging_frame(cap, subtitle, fps, face_analyzer, eye_detector, 
                            target_mood, text_emotions):
    """
    Find the most visually engaging frame for this subtitle
    
    Scoring based on:
    - Expression matching dialogue (internal, not shown)
    - Eye quality (open, alert)
    - Visual composition
    - Sharpness/clarity
    """
    
    # Time window to search
    start_time = subtitle.start.total_seconds()
    end_time = subtitle.end.total_seconds()
    duration = end_time - start_time
    
    # Extend search window slightly for better options
    search_start = max(0, start_time - 0.5)
    search_end = end_time + 0.5
    
    start_frame = int(search_start * fps)
    end_frame = int(search_end * fps)
    
    # Sample frames intelligently
    num_samples = min(15, end_frame - start_frame)
    if num_samples <= 0:
        num_samples = 5
    
    frame_step = max(1, (end_frame - start_frame) // num_samples)
    
    best_frame = None
    best_score = -1
    
    for frame_num in range(start_frame, end_frame, frame_step):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        ret, frame = cap.read()
        
        if not ret or frame is None:
            continue
        
        # Calculate engagement score
        score = calculate_engagement_score(
            frame, face_analyzer, eye_detector, 
            target_mood, text_emotions
        )
        
        if score > best_score:
            best_score = score
            best_frame = {
                'image': frame.copy(),
                'score': score,
                'frame_num': frame_num
            }
    
    return best_frame


def calculate_engagement_score(frame, face_analyzer, eye_detector, 
                              target_mood, text_emotions):
    """
    Calculate how engaging/suitable this frame is for the comic
    
    High scores for:
    - Good facial expressions
    - Open eyes
    - Clear image
    - Good composition
    """
    
    score = 0.0
    
    # Save temp for analysis
    temp_path = "temp_frame_analysis.png"
    cv2.imwrite(temp_path, frame)
    
    try:
        # 1. Eye quality (most important for comics)
        eye_state = eye_detector.check_eyes_state(temp_path)
        if eye_state['state'] == 'open':
            score += 3.0
        elif eye_state['state'] == 'partially_open':
            score += 1.5
        elif eye_state['state'] == 'unknown':
            score += 1.0  # No face, might be okay
        else:  # closed or half_closed
            score += 0.0  # Strong penalty
        
        # 2. Expression quality (internal matching)
        face_emotions = face_analyzer.analyze_expression(temp_path)
        
        # Check if expression matches mood
        if target_mood in face_emotions and face_emotions[target_mood] > 0.3:
            score += 2.0 * face_emotions[target_mood]
        
        # General expressiveness (any strong emotion is interesting)
        max_emotion = max(face_emotions.values())
        if max_emotion > 0.5:
            score += 1.0
        
        # 3. Image quality
        sharpness = calculate_sharpness(frame)
        score += sharpness * 0.5
        
        # 4. Composition (face detection confidence)
        if eye_state.get('confidence', 0) > 0.7:
            score += 0.5
        
    finally:
        # Clean up
        if os.path.exists(temp_path):
            os.remove(temp_path)
    
    return score


def calculate_sharpness(frame):
    """Calculate image sharpness using Laplacian variance"""
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    laplacian = cv2.Laplacian(gray, cv2.CV_64F)
    variance = laplacian.var()
    
    # Normalize to 0-1 range
    normalized = min(variance / 500.0, 1.0)
    return normalized


def enhance_for_comic(frame):
    """Apply subtle enhancements to make frame more comic-like"""
    lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    l = clahe.apply(l)
    enhanced = cv2.merge([l, a, b])
    enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR)
    return enhanced


def get_decent_frame(cap, subtitle, fps):
    """Get a decent fallback frame"""
    positions = [0.5, 0.3, 0.7, 0.2, 0.8]
    duration = subtitle.end.total_seconds() - subtitle.start.total_seconds()
    for pos in positions:
        time_offset = subtitle.start.total_seconds() + (duration * pos)
        frame_num = int(time_offset * fps)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        ret, frame = cap.read()
        if ret and frame is not None:
            if calculate_sharpness(frame) > 0.3:
                return frame
    return None