lsatone / backend /keyframes /keyframes_smart.py
3v324v23's picture
Update Comic123 with local comic folder files
83e35a7
"""
Smart keyframe generation with eye detection and emotion matching
"""
import os
import cv2
import srt
from typing import List
import numpy as np
from backend.eye_state_detector import EyeStateDetector, enhance_frame_selection
from backend.utils import copy_and_rename_file
def generate_keyframes_smart(video_path: str, story_subs: List, max_frames: int = 48):
"""
Generate keyframes with smart selection (no half-closed eyes)
Args:
video_path: Path to video file
story_subs: List of subtitle objects for key story moments
max_frames: Maximum number of frames to extract (default 48)
"""
print(f"🎯 Generating {len(story_subs)} smart keyframes (avoiding closed eyes)")
# Initialize eye detector
eye_detector = EyeStateDetector()
# Ensure output directory exists
final_dir = "frames/final"
os.makedirs(final_dir, exist_ok=True)
# Clear existing frames
for f in os.listdir(final_dir):
if f.endswith('.png'):
os.remove(os.path.join(final_dir, f))
# Open video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"❌ Failed to open video: {video_path}")
return False
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"📹 Video: {fps} fps, {total_frames} total frames")
print(f"👁️ Smart frame selection enabled (avoiding half-closed eyes)")
# Extract frames
extracted_count = 0
for i, sub in enumerate(story_subs[:max_frames]):
try:
print(f"\n📝 Processing segment {i+1}/{min(len(story_subs), max_frames)}: {sub.content[:40]}...")
# Extract multiple candidate frames for this subtitle
candidates = extract_candidate_frames(
cap, sub, fps,
num_candidates=5 # Extract 5 frames to choose from
)
if candidates:
# Select best frame (no half-closed eyes)
best_frame, eye_state = select_best_candidate(candidates, eye_detector)
if best_frame is not None:
output_path = os.path.join(final_dir, f"frame{extracted_count:03d}.png")
cv2.imwrite(output_path, best_frame)
extracted_count += 1
print(f" ✅ Selected frame with {eye_state['state']} eyes (confidence: {eye_state['confidence']:.2f})")
else:
print(f" ⚠️ No suitable frame found (all had closed/half-closed eyes)")
else:
print(f" ⚠️ Failed to extract candidate frames")
except Exception as e:
print(f" ❌ Error processing segment {i+1}: {e}")
cap.release()
# If we didn't get enough frames, extract more with relaxed criteria
if extracted_count < max_frames and extracted_count < 10:
print(f"\n⚠️ Only extracted {extracted_count} frames, extracting more with relaxed criteria...")
_extract_additional_frames(video_path, final_dir, extracted_count, max_frames)
# Final count
final_frames = len([f for f in os.listdir(final_dir) if f.endswith('.png')])
print(f"\n✅ Total frames extracted: {final_frames}")
print(f"👁️ All frames checked for eye quality")
return final_frames > 0
def extract_candidate_frames(cap, subtitle, fps, num_candidates=5):
"""Extract multiple candidate frames from a subtitle segment"""
candidates = []
# Calculate time range
start_time = subtitle.start.total_seconds()
end_time = subtitle.end.total_seconds()
duration = end_time - start_time
# If duration is very short, just get middle frame
if duration < 0.5:
num_candidates = 1
# Extract frames evenly distributed across the duration
for i in range(num_candidates):
# Calculate timestamp (avoid very start/end to reduce motion blur)
if num_candidates == 1:
time_offset = duration / 2
else:
# Distribute between 20% and 80% of duration
time_offset = 0.2 * duration + (i / (num_candidates - 1)) * 0.6 * duration
timestamp = start_time + time_offset
frame_num = int(timestamp * fps)
# Extract frame
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
ret, frame = cap.read()
if ret and frame is not None:
candidates.append(frame)
return candidates
def select_best_candidate(candidates: List[np.ndarray], eye_detector: EyeStateDetector):
"""Select the best frame from candidates based on eye state"""
best_frame = None
best_score = -1
best_state = None
for i, frame in enumerate(candidates):
# Save temp frame for analysis
temp_path = f"temp_candidate_{i}.png"
cv2.imwrite(temp_path, frame)
# Check eye state
eye_state = eye_detector.check_eyes_state(temp_path)
# Calculate score
score = calculate_frame_score(eye_state)
# Update best if this is better
if score > best_score:
best_score = score
best_frame = frame
best_state = eye_state
# Clean up temp file
if os.path.exists(temp_path):
os.remove(temp_path)
return best_frame, best_state
def calculate_frame_score(eye_state):
"""Calculate a quality score for a frame based on eye state"""
score = 0.0
# Eye state scoring (most important)
if eye_state['state'] == 'open':
score += 10.0
elif eye_state['state'] == 'partially_open':
score += 7.0
elif eye_state['state'] == 'unknown':
score += 5.0 # Might be okay (no face detected)
elif eye_state['state'] == 'half_closed':
score += 2.0
else: # closed
score += 0.0
# Confidence bonus
score += eye_state['confidence'] * 3.0
# Suitability check
if eye_state['suitable_for_comic']:
score += 5.0
return score
def _extract_additional_frames(video_path: str, output_dir: str, start_count: int, target_count: int):
"""Extract additional frames with relaxed eye criteria"""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return
eye_detector = EyeStateDetector()
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
needed = target_count - start_count
step = total_frames / needed if needed > 0 else 1
count = start_count
attempts = 0
max_attempts = needed * 3 # Try up to 3x frames to find good ones
while count < target_count and attempts < max_attempts:
frame_num = int((attempts * step) % total_frames)
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
ret, frame = cap.read()
if ret:
# Check eye state with relaxed criteria
temp_path = f"temp_check_{attempts}.png"
cv2.imwrite(temp_path, frame)
eye_state = eye_detector.check_eyes_state(temp_path)
# Accept if not completely closed
if eye_state['state'] not in ['closed', 'half_closed']:
output_path = os.path.join(output_dir, f"frame{count:03d}.png")
cv2.imwrite(output_path, frame)
count += 1
print(f" ✅ Added frame {count} ({eye_state['state']} eyes)")
# Clean up
if os.path.exists(temp_path):
os.remove(temp_path)
attempts += 1
cap.release()
print(f" ✅ Extracted {count - start_count} additional frames")