|
|
""" |
|
|
Emotion-Aware Comic Generation |
|
|
Creates comics that match facial expressions with dialogue emotions |
|
|
""" |
|
|
|
|
|
import cv2 |
|
|
import numpy as np |
|
|
import os |
|
|
import json |
|
|
from typing import List, Dict, Tuple, Optional |
|
|
import srt |
|
|
from datetime import timedelta |
|
|
|
|
|
class FacialExpressionAnalyzer: |
|
|
"""Analyze facial expressions in frames""" |
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') |
|
|
self.eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml') |
|
|
self.smile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_smile.xml') |
|
|
|
|
|
def analyze_expression(self, image_path: str) -> Dict[str, float]: |
|
|
"""Analyze facial expression in an image""" |
|
|
img = cv2.imread(image_path) |
|
|
if img is None: |
|
|
return self._default_expression() |
|
|
|
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
|
|
|
faces = self.face_cascade.detectMultiScale(gray, 1.1, 4) |
|
|
if len(faces) == 0: |
|
|
return self._default_expression() |
|
|
|
|
|
|
|
|
x, y, w, h = max(faces, key=lambda f: f[2] * f[3]) |
|
|
face_roi = gray[y:y+h, x:x+w] |
|
|
|
|
|
|
|
|
eyes = self.eye_cascade.detectMultiScale(face_roi, 1.1, 5) |
|
|
smiles = self.smile_cascade.detectMultiScale(face_roi, 1.8, 20) |
|
|
|
|
|
|
|
|
expression = self._analyze_features(face_roi, eyes, smiles) |
|
|
|
|
|
|
|
|
expression['intensity'] = self._analyze_intensity(face_roi) |
|
|
|
|
|
return expression |
|
|
|
|
|
def _analyze_features(self, face_roi, eyes, smiles) -> Dict[str, float]: |
|
|
"""Analyze facial features to determine expression""" |
|
|
expression = { |
|
|
'happy': 0.0, |
|
|
'sad': 0.0, |
|
|
'angry': 0.0, |
|
|
'surprised': 0.0, |
|
|
'neutral': 0.5 |
|
|
} |
|
|
|
|
|
|
|
|
if len(smiles) > 0: |
|
|
expression['happy'] = 0.7 |
|
|
expression['neutral'] = 0.3 |
|
|
|
|
|
|
|
|
if len(eyes) >= 2: |
|
|
|
|
|
eye_region = face_roi[:face_roi.shape[0]//2, :] |
|
|
eye_variance = np.var(eye_region) |
|
|
|
|
|
if eye_variance > 1000: |
|
|
expression['surprised'] = 0.6 |
|
|
elif eye_variance < 500: |
|
|
expression['angry'] = 0.4 |
|
|
elif len(eyes) < 2: |
|
|
|
|
|
expression['sad'] = 0.3 |
|
|
expression['angry'] = 0.3 |
|
|
|
|
|
|
|
|
total = sum(expression.values()) |
|
|
if total > 0: |
|
|
expression = {k: v/total for k, v in expression.items()} |
|
|
|
|
|
return expression |
|
|
|
|
|
def _analyze_intensity(self, face_roi) -> float: |
|
|
"""Analyze expression intensity""" |
|
|
|
|
|
edges = cv2.Canny(face_roi, 50, 150) |
|
|
edge_density = np.sum(edges > 0) / edges.size |
|
|
|
|
|
|
|
|
intensity = min(edge_density * 5, 1.0) |
|
|
return intensity |
|
|
|
|
|
def _default_expression(self) -> Dict[str, float]: |
|
|
"""Default expression when no face detected""" |
|
|
return { |
|
|
'neutral': 1.0, |
|
|
'happy': 0.0, |
|
|
'sad': 0.0, |
|
|
'angry': 0.0, |
|
|
'surprised': 0.0, |
|
|
'intensity': 0.5 |
|
|
} |
|
|
|
|
|
class DialogueEmotionAnalyzer: |
|
|
"""Analyze emotions in dialogue text""" |
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
self.emotion_words = { |
|
|
'happy': { |
|
|
'words': ['happy', 'joy', 'love', 'great', 'wonderful', 'amazing', 'fantastic', 'excellent', 'beautiful', 'laugh', 'smile', 'fun'], |
|
|
'weight': 1.0 |
|
|
}, |
|
|
'sad': { |
|
|
'words': ['sad', 'cry', 'tear', 'sorry', 'miss', 'lonely', 'depressed', 'hurt', 'pain', 'loss', 'grief'], |
|
|
'weight': 1.0 |
|
|
}, |
|
|
'angry': { |
|
|
'words': ['angry', 'mad', 'furious', 'hate', 'stupid', 'idiot', 'damn', 'hell', 'rage', 'annoyed'], |
|
|
'weight': 1.2 |
|
|
}, |
|
|
'surprised': { |
|
|
'words': ['wow', 'oh', 'what', 'really', 'seriously', 'unbelievable', 'amazing', 'shocked', 'surprised'], |
|
|
'weight': 0.8 |
|
|
}, |
|
|
'fear': { |
|
|
'words': ['afraid', 'scared', 'fear', 'terrified', 'nervous', 'worry', 'panic', 'help', 'danger'], |
|
|
'weight': 1.0 |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
self.punctuation_emotions = { |
|
|
'!': {'surprised': 0.3, 'happy': 0.2, 'angry': 0.2}, |
|
|
'?': {'surprised': 0.4, 'confused': 0.3}, |
|
|
'...': {'sad': 0.3, 'thoughtful': 0.3}, |
|
|
'?!': {'surprised': 0.6}, |
|
|
'!!!': {'angry': 0.4, 'excited': 0.4} |
|
|
} |
|
|
|
|
|
def analyze_dialogue(self, text: str) -> Dict[str, float]: |
|
|
"""Analyze emotion in dialogue text""" |
|
|
if not text: |
|
|
return {'neutral': 1.0} |
|
|
|
|
|
text_lower = text.lower() |
|
|
emotions = {'neutral': 0.2} |
|
|
|
|
|
|
|
|
for emotion, data in self.emotion_words.items(): |
|
|
score = 0 |
|
|
for word in data['words']: |
|
|
if word in text_lower: |
|
|
score += data['weight'] |
|
|
|
|
|
if score > 0: |
|
|
emotions[emotion] = score |
|
|
|
|
|
|
|
|
for pattern, emotion_scores in self.punctuation_emotions.items(): |
|
|
if pattern in text: |
|
|
for emotion, score in emotion_scores.items(): |
|
|
emotions[emotion] = emotions.get(emotion, 0) + score |
|
|
|
|
|
|
|
|
caps_ratio = sum(1 for c in text if c.isupper()) / len(text) if text else 0 |
|
|
if caps_ratio > 0.5: |
|
|
emotions['intensity'] = 0.8 |
|
|
else: |
|
|
emotions['intensity'] = 0.5 |
|
|
|
|
|
|
|
|
emotion_sum = sum(v for k, v in emotions.items() if k != 'intensity') |
|
|
if emotion_sum > 0: |
|
|
for k in emotions: |
|
|
if k != 'intensity': |
|
|
emotions[k] = emotions[k] / emotion_sum |
|
|
|
|
|
return emotions |
|
|
|
|
|
class StoryCondenser: |
|
|
"""Condense long stories into key moments""" |
|
|
|
|
|
def __init__(self): |
|
|
self.min_panels = 10 |
|
|
self.max_panels = 15 |
|
|
|
|
|
def identify_key_moments(self, subtitles: List[srt.Subtitle]) -> List[int]: |
|
|
"""Identify indices of key story moments""" |
|
|
if len(subtitles) <= self.max_panels: |
|
|
return list(range(len(subtitles))) |
|
|
|
|
|
key_indices = [] |
|
|
|
|
|
|
|
|
key_indices.extend([0, len(subtitles) - 1]) |
|
|
|
|
|
|
|
|
turning_points = self._find_turning_points(subtitles) |
|
|
key_indices.extend(turning_points) |
|
|
|
|
|
|
|
|
emotional_peaks = self._find_emotional_peaks(subtitles) |
|
|
key_indices.extend(emotional_peaks) |
|
|
|
|
|
|
|
|
action_moments = self._find_action_moments(subtitles) |
|
|
key_indices.extend(action_moments) |
|
|
|
|
|
|
|
|
key_indices = sorted(list(set(key_indices))) |
|
|
|
|
|
|
|
|
if len(key_indices) > self.max_panels: |
|
|
key_indices = self._select_most_important(subtitles, key_indices) |
|
|
|
|
|
|
|
|
if len(key_indices) < self.min_panels: |
|
|
key_indices = self._add_transitions(subtitles, key_indices) |
|
|
|
|
|
return sorted(key_indices)[:self.max_panels] |
|
|
|
|
|
def _find_turning_points(self, subtitles: List[srt.Subtitle]) -> List[int]: |
|
|
"""Find story turning points""" |
|
|
turning_words = ['but', 'however', 'suddenly', 'then', 'meanwhile', 'later', 'finally'] |
|
|
indices = [] |
|
|
|
|
|
for i, sub in enumerate(subtitles): |
|
|
text_lower = sub.content.lower() |
|
|
if any(word in text_lower for word in turning_words): |
|
|
indices.append(i) |
|
|
|
|
|
return indices |
|
|
|
|
|
def _find_emotional_peaks(self, subtitles: List[srt.Subtitle]) -> List[int]: |
|
|
"""Find emotional peaks in dialogue""" |
|
|
analyzer = DialogueEmotionAnalyzer() |
|
|
emotion_scores = [] |
|
|
|
|
|
for i, sub in enumerate(subtitles): |
|
|
emotions = analyzer.analyze_dialogue(sub.content) |
|
|
|
|
|
intensity = max(v for k, v in emotions.items() if k != 'neutral') |
|
|
emotion_scores.append((i, intensity)) |
|
|
|
|
|
|
|
|
emotion_scores.sort(key=lambda x: x[1], reverse=True) |
|
|
return [idx for idx, score in emotion_scores[:5] if score > 0.5] |
|
|
|
|
|
def _find_action_moments(self, subtitles: List[srt.Subtitle]) -> List[int]: |
|
|
"""Find action moments""" |
|
|
action_words = ['run', 'fight', 'escape', 'attack', 'save', 'help', 'stop', 'go', 'move', 'quick'] |
|
|
indices = [] |
|
|
|
|
|
for i, sub in enumerate(subtitles): |
|
|
text_lower = sub.content.lower() |
|
|
if any(word in text_lower for word in action_words): |
|
|
indices.append(i) |
|
|
|
|
|
return indices |
|
|
|
|
|
def _select_most_important(self, subtitles: List[srt.Subtitle], indices: List[int]) -> List[int]: |
|
|
"""Select most important moments from candidates""" |
|
|
scored_indices = [] |
|
|
|
|
|
for idx in indices: |
|
|
score = self._calculate_importance_score(subtitles[idx], idx, len(subtitles)) |
|
|
scored_indices.append((idx, score)) |
|
|
|
|
|
scored_indices.sort(key=lambda x: x[1], reverse=True) |
|
|
return [idx for idx, score in scored_indices[:self.max_panels]] |
|
|
|
|
|
def _calculate_importance_score(self, subtitle: srt.Subtitle, index: int, total: int) -> float: |
|
|
"""Calculate importance score for a subtitle""" |
|
|
score = 1.0 |
|
|
|
|
|
|
|
|
position_ratio = index / total |
|
|
if position_ratio < 0.1 or position_ratio > 0.9: |
|
|
score += 0.5 |
|
|
elif 0.4 < position_ratio < 0.6: |
|
|
score += 0.3 |
|
|
|
|
|
|
|
|
word_count = len(subtitle.content.split()) |
|
|
score += min(word_count * 0.1, 0.5) |
|
|
|
|
|
|
|
|
if '!' in subtitle.content: |
|
|
score += 0.3 |
|
|
if '?' in subtitle.content: |
|
|
score += 0.2 |
|
|
|
|
|
return score |
|
|
|
|
|
def _add_transitions(self, subtitles: List[srt.Subtitle], current_indices: List[int]) -> List[int]: |
|
|
"""Add transitional moments between key points""" |
|
|
new_indices = list(current_indices) |
|
|
|
|
|
|
|
|
gaps = [] |
|
|
for i in range(len(current_indices) - 1): |
|
|
gap_size = current_indices[i+1] - current_indices[i] |
|
|
if gap_size > 2: |
|
|
gaps.append((current_indices[i], current_indices[i+1], gap_size)) |
|
|
|
|
|
|
|
|
gaps.sort(key=lambda x: x[2], reverse=True) |
|
|
|
|
|
|
|
|
for start, end, size in gaps: |
|
|
if len(new_indices) >= self.min_panels: |
|
|
break |
|
|
midpoint = (start + end) // 2 |
|
|
new_indices.append(midpoint) |
|
|
|
|
|
return sorted(new_indices) |
|
|
|
|
|
class EmotionAwareComicGenerator: |
|
|
"""Generate comics with emotion-aware panel selection""" |
|
|
|
|
|
def __init__(self): |
|
|
self.face_analyzer = FacialExpressionAnalyzer() |
|
|
self.dialogue_analyzer = DialogueEmotionAnalyzer() |
|
|
self.story_condenser = StoryCondenser() |
|
|
|
|
|
def generate_emotion_comic(self, video_path: str, max_panels: int = 12) -> Dict: |
|
|
"""Generate comic with emotion-matched panels""" |
|
|
print("🎭 Generating Emotion-Aware Comic...") |
|
|
|
|
|
|
|
|
subtitles = self._load_subtitles() |
|
|
all_frames = self._get_all_frames() |
|
|
|
|
|
if not subtitles or not all_frames: |
|
|
print("❌ Missing subtitles or frames") |
|
|
return None |
|
|
|
|
|
|
|
|
print("📖 Identifying key story moments...") |
|
|
key_indices = self.story_condenser.identify_key_moments(subtitles) |
|
|
print(f" Found {len(key_indices)} key moments") |
|
|
|
|
|
|
|
|
print("🎭 Matching facial expressions with dialogue...") |
|
|
matched_panels = [] |
|
|
|
|
|
for idx in key_indices: |
|
|
subtitle = subtitles[idx] |
|
|
|
|
|
|
|
|
text_emotions = self.dialogue_analyzer.analyze_dialogue(subtitle.content) |
|
|
|
|
|
|
|
|
best_frame = self._find_best_emotion_match( |
|
|
subtitle, text_emotions, all_frames, idx, len(subtitles) |
|
|
) |
|
|
|
|
|
matched_panels.append({ |
|
|
'subtitle': subtitle, |
|
|
'frame': best_frame['path'], |
|
|
'text_emotions': text_emotions, |
|
|
'face_emotions': best_frame['emotions'], |
|
|
'match_score': best_frame['score'], |
|
|
'index': idx |
|
|
}) |
|
|
|
|
|
|
|
|
print("📐 Creating emotion-aware layout...") |
|
|
comic_data = self._create_emotion_layout(matched_panels) |
|
|
|
|
|
|
|
|
self._save_emotion_comic(comic_data) |
|
|
|
|
|
print(f"✅ Emotion-aware comic created with {len(matched_panels)} panels!") |
|
|
return comic_data |
|
|
|
|
|
def _find_best_emotion_match(self, subtitle: srt.Subtitle, text_emotions: Dict, |
|
|
frames: List[str], sub_index: int, total_subs: int) -> Dict: |
|
|
"""Find frame with best emotion match""" |
|
|
|
|
|
|
|
|
frame_ratio = sub_index / total_subs |
|
|
center_frame = int(frame_ratio * len(frames)) |
|
|
|
|
|
|
|
|
search_range = 5 |
|
|
start = max(0, center_frame - search_range) |
|
|
end = min(len(frames), center_frame + search_range + 1) |
|
|
|
|
|
best_match = { |
|
|
'path': frames[center_frame] if center_frame < len(frames) else frames[-1], |
|
|
'emotions': {'neutral': 1.0}, |
|
|
'score': 0 |
|
|
} |
|
|
|
|
|
|
|
|
for i in range(start, end): |
|
|
if i >= len(frames): |
|
|
break |
|
|
|
|
|
|
|
|
face_emotions = self.face_analyzer.analyze_expression(frames[i]) |
|
|
|
|
|
|
|
|
score = self._calculate_emotion_match_score(text_emotions, face_emotions) |
|
|
|
|
|
if score > best_match['score']: |
|
|
best_match = { |
|
|
'path': frames[i], |
|
|
'emotions': face_emotions, |
|
|
'score': score |
|
|
} |
|
|
|
|
|
return best_match |
|
|
|
|
|
def _calculate_emotion_match_score(self, text_emotions: Dict, face_emotions: Dict) -> float: |
|
|
"""Calculate how well emotions match""" |
|
|
score = 0 |
|
|
|
|
|
|
|
|
emotions = set(text_emotions.keys()) | set(face_emotions.keys()) |
|
|
for emotion in emotions: |
|
|
if emotion == 'intensity': |
|
|
continue |
|
|
|
|
|
text_score = text_emotions.get(emotion, 0) |
|
|
face_score = face_emotions.get(emotion, 0) |
|
|
|
|
|
|
|
|
if text_score > 0.3 and face_score > 0.3: |
|
|
score += min(text_score, face_score) * 2 |
|
|
else: |
|
|
|
|
|
score -= abs(text_score - face_score) * 0.5 |
|
|
|
|
|
|
|
|
text_intensity = text_emotions.get('intensity', 0.5) |
|
|
face_intensity = face_emotions.get('intensity', 0.5) |
|
|
if abs(text_intensity - face_intensity) < 0.3: |
|
|
score += 0.5 |
|
|
|
|
|
return max(0, score) |
|
|
|
|
|
def _create_emotion_layout(self, panels: List[Dict]) -> Dict: |
|
|
"""Create layout with emotion-aware styling""" |
|
|
pages = [] |
|
|
panels_per_page = 4 |
|
|
|
|
|
for i in range(0, len(panels), panels_per_page): |
|
|
page_panels = panels[i:i+panels_per_page] |
|
|
|
|
|
page = { |
|
|
'width': 800, |
|
|
'height': 600, |
|
|
'panels': [], |
|
|
'bubbles': [] |
|
|
} |
|
|
|
|
|
positions = [ |
|
|
(10, 10, 380, 280), |
|
|
(410, 10, 380, 280), |
|
|
(10, 310, 380, 280), |
|
|
(410, 310, 380, 280) |
|
|
] |
|
|
|
|
|
for j, panel_data in enumerate(page_panels): |
|
|
if j >= 4: |
|
|
break |
|
|
|
|
|
x, y, w, h = positions[j] |
|
|
|
|
|
|
|
|
all_emotions = {**panel_data['text_emotions'], **panel_data['face_emotions']} |
|
|
dominant_emotion = max(all_emotions.items(), |
|
|
key=lambda x: x[1] if x[0] != 'intensity' else 0)[0] |
|
|
|
|
|
|
|
|
page['panels'].append({ |
|
|
'x': x, 'y': y, |
|
|
'width': w, 'height': h, |
|
|
'image': panel_data['frame'], |
|
|
'emotion': dominant_emotion, |
|
|
'match_score': panel_data['match_score'] |
|
|
}) |
|
|
|
|
|
|
|
|
bubble_style = self._get_emotion_bubble_style(dominant_emotion) |
|
|
|
|
|
page['bubbles'].append({ |
|
|
'id': f'bubble_{panel_data["index"]}', |
|
|
'x': x + 20, |
|
|
'y': y + h - 100, |
|
|
'width': 150, |
|
|
'height': 70, |
|
|
'text': panel_data['subtitle'].content, |
|
|
'style': bubble_style |
|
|
}) |
|
|
|
|
|
pages.append(page) |
|
|
|
|
|
return {'pages': pages} |
|
|
|
|
|
def _get_emotion_bubble_style(self, emotion: str) -> Dict: |
|
|
"""Get bubble style for emotion""" |
|
|
styles = { |
|
|
'happy': { |
|
|
'shape': 'round', |
|
|
'border': '#4CAF50', |
|
|
'background': '#E8F5E9', |
|
|
'font': 'bold' |
|
|
}, |
|
|
'sad': { |
|
|
'shape': 'droopy', |
|
|
'border': '#2196F3', |
|
|
'background': '#E3F2FD', |
|
|
'font': 'italic' |
|
|
}, |
|
|
'angry': { |
|
|
'shape': 'jagged', |
|
|
'border': '#F44336', |
|
|
'background': '#FFEBEE', |
|
|
'font': 'bold', |
|
|
'size': 'large' |
|
|
}, |
|
|
'surprised': { |
|
|
'shape': 'burst', |
|
|
'border': '#FF9800', |
|
|
'background': '#FFF3E0', |
|
|
'font': 'bold' |
|
|
}, |
|
|
'neutral': { |
|
|
'shape': 'round', |
|
|
'border': '#333', |
|
|
'background': '#FFF', |
|
|
'font': 'normal' |
|
|
} |
|
|
} |
|
|
|
|
|
return styles.get(emotion, styles['neutral']) |
|
|
|
|
|
def _load_subtitles(self) -> List[srt.Subtitle]: |
|
|
"""Load subtitles""" |
|
|
if os.path.exists('test1.srt'): |
|
|
with open('test1.srt', 'r') as f: |
|
|
return list(srt.parse(f.read())) |
|
|
return [] |
|
|
|
|
|
def _get_all_frames(self) -> List[str]: |
|
|
"""Get all available frames""" |
|
|
frames_dir = 'frames' |
|
|
if os.path.exists(frames_dir): |
|
|
frames = [os.path.join(frames_dir, f) for f in sorted(os.listdir(frames_dir)) |
|
|
if f.endswith('.png')] |
|
|
return frames |
|
|
return [] |
|
|
|
|
|
def _save_emotion_comic(self, comic_data: Dict): |
|
|
"""Save emotion-aware comic""" |
|
|
os.makedirs('output', exist_ok=True) |
|
|
|
|
|
|
|
|
with open('output/emotion_comic.json', 'w') as f: |
|
|
json.dump(comic_data, f, indent=2) |
|
|
|
|
|
print("✅ Saved emotion-aware comic to output/emotion_comic.json") |
|
|
|
|
|
|
|
|
def create_emotion_comic(video_path='video/sample.mp4'): |
|
|
"""Create an emotion-aware comic""" |
|
|
generator = EmotionAwareComicGenerator() |
|
|
return generator.generate_emotion_comic(video_path) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
create_emotion_comic() |