""" Smart keyframe generation with eye detection and emotion matching """ import os import cv2 import srt from typing import List import numpy as np from backend.eye_state_detector import EyeStateDetector, enhance_frame_selection from backend.utils import copy_and_rename_file def generate_keyframes_smart(video_path: str, story_subs: List, max_frames: int = 48): """ Generate keyframes with smart selection (no half-closed eyes) Args: video_path: Path to video file story_subs: List of subtitle objects for key story moments max_frames: Maximum number of frames to extract (default 48) """ print(f"šŸŽÆ Generating {len(story_subs)} smart keyframes (avoiding closed eyes)") # Initialize eye detector eye_detector = EyeStateDetector() # Ensure output directory exists final_dir = "frames/final" os.makedirs(final_dir, exist_ok=True) # Clear existing frames for f in os.listdir(final_dir): if f.endswith('.png'): os.remove(os.path.join(final_dir, f)) # Open video cap = cv2.VideoCapture(video_path) if not cap.isOpened(): print(f"āŒ Failed to open video: {video_path}") return False fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) print(f"šŸ“¹ Video: {fps} fps, {total_frames} total frames") print(f"šŸ‘ļø Smart frame selection enabled (avoiding half-closed eyes)") # Extract frames extracted_count = 0 for i, sub in enumerate(story_subs[:max_frames]): try: print(f"\nšŸ“ Processing segment {i+1}/{min(len(story_subs), max_frames)}: {sub.content[:40]}...") # Extract multiple candidate frames for this subtitle candidates = extract_candidate_frames( cap, sub, fps, num_candidates=5 # Extract 5 frames to choose from ) if candidates: # Select best frame (no half-closed eyes) best_frame, eye_state = select_best_candidate(candidates, eye_detector) if best_frame is not None: output_path = os.path.join(final_dir, f"frame{extracted_count:03d}.png") cv2.imwrite(output_path, best_frame) extracted_count += 1 print(f" āœ… Selected frame with {eye_state['state']} eyes (confidence: {eye_state['confidence']:.2f})") else: print(f" āš ļø No suitable frame found (all had closed/half-closed eyes)") else: print(f" āš ļø Failed to extract candidate frames") except Exception as e: print(f" āŒ Error processing segment {i+1}: {e}") cap.release() # If we didn't get enough frames, extract more with relaxed criteria if extracted_count < max_frames and extracted_count < 10: print(f"\nāš ļø Only extracted {extracted_count} frames, extracting more with relaxed criteria...") _extract_additional_frames(video_path, final_dir, extracted_count, max_frames) # Final count final_frames = len([f for f in os.listdir(final_dir) if f.endswith('.png')]) print(f"\nāœ… Total frames extracted: {final_frames}") print(f"šŸ‘ļø All frames checked for eye quality") return final_frames > 0 def extract_candidate_frames(cap, subtitle, fps, num_candidates=5): """Extract multiple candidate frames from a subtitle segment""" candidates = [] # Calculate time range start_time = subtitle.start.total_seconds() end_time = subtitle.end.total_seconds() duration = end_time - start_time # If duration is very short, just get middle frame if duration < 0.5: num_candidates = 1 # Extract frames evenly distributed across the duration for i in range(num_candidates): # Calculate timestamp (avoid very start/end to reduce motion blur) if num_candidates == 1: time_offset = duration / 2 else: # Distribute between 20% and 80% of duration time_offset = 0.2 * duration + (i / (num_candidates - 1)) * 0.6 * duration timestamp = start_time + time_offset frame_num = int(timestamp * fps) # Extract frame cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) ret, frame = cap.read() if ret and frame is not None: candidates.append(frame) return candidates def select_best_candidate(candidates: List[np.ndarray], eye_detector: EyeStateDetector): """Select the best frame from candidates based on eye state""" best_frame = None best_score = -1 best_state = None for i, frame in enumerate(candidates): # Save temp frame for analysis temp_path = f"temp_candidate_{i}.png" cv2.imwrite(temp_path, frame) # Check eye state eye_state = eye_detector.check_eyes_state(temp_path) # Calculate score score = calculate_frame_score(eye_state) # Update best if this is better if score > best_score: best_score = score best_frame = frame best_state = eye_state # Clean up temp file if os.path.exists(temp_path): os.remove(temp_path) return best_frame, best_state def calculate_frame_score(eye_state): """Calculate a quality score for a frame based on eye state""" score = 0.0 # Eye state scoring (most important) if eye_state['state'] == 'open': score += 10.0 elif eye_state['state'] == 'partially_open': score += 7.0 elif eye_state['state'] == 'unknown': score += 5.0 # Might be okay (no face detected) elif eye_state['state'] == 'half_closed': score += 2.0 else: # closed score += 0.0 # Confidence bonus score += eye_state['confidence'] * 3.0 # Suitability check if eye_state['suitable_for_comic']: score += 5.0 return score def _extract_additional_frames(video_path: str, output_dir: str, start_count: int, target_count: int): """Extract additional frames with relaxed eye criteria""" cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return eye_detector = EyeStateDetector() total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) needed = target_count - start_count step = total_frames / needed if needed > 0 else 1 count = start_count attempts = 0 max_attempts = needed * 3 # Try up to 3x frames to find good ones while count < target_count and attempts < max_attempts: frame_num = int((attempts * step) % total_frames) cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) ret, frame = cap.read() if ret: # Check eye state with relaxed criteria temp_path = f"temp_check_{attempts}.png" cv2.imwrite(temp_path, frame) eye_state = eye_detector.check_eyes_state(temp_path) # Accept if not completely closed if eye_state['state'] not in ['closed', 'half_closed']: output_path = os.path.join(output_dir, f"frame{count:03d}.png") cv2.imwrite(output_path, frame) count += 1 print(f" āœ… Added frame {count} ({eye_state['state']} eyes)") # Clean up if os.path.exists(temp_path): os.remove(temp_path) attempts += 1 cap.release() print(f" āœ… Extracted {count - start_count} additional frames")