|
|
""" |
|
|
Smart Story Extractor - Extracts meaningful story moments for full comic generation |
|
|
""" |
|
|
|
|
|
import json |
|
|
import os |
|
|
import re |
|
|
from typing import List, Dict, Tuple |
|
|
import numpy as np |
|
|
|
|
|
class SmartStoryExtractor: |
|
|
def __init__(self): |
|
|
"""Initialize the smart story extractor""" |
|
|
self.story_keywords = { |
|
|
'introduction': ['hello', 'hi', 'name', 'meet', 'introduce', 'welcome', 'start', 'begin', 'once upon'], |
|
|
'conflict': ['but', 'however', 'problem', 'issue', 'challenge', 'difficult', 'trouble', 'wrong', 'bad'], |
|
|
'action': ['run', 'fight', 'jump', 'attack', 'defend', 'escape', 'chase', 'battle', 'move', 'quick'], |
|
|
'emotion': ['happy', 'sad', 'angry', 'scared', 'love', 'hate', 'fear', 'joy', 'cry', 'laugh', 'smile'], |
|
|
'climax': ['finally', 'suddenly', 'then', 'biggest', 'most', 'intense', 'peak', 'critical', 'important'], |
|
|
'resolution': ['end', 'finally', 'resolve', 'solve', 'peace', 'happy', 'conclude', 'finish', 'done'] |
|
|
} |
|
|
|
|
|
def extract_meaningful_story(self, subtitles_file: str, target_panels: int = 48) -> List[Dict]: |
|
|
"""Extract meaningful story moments for comic panels |
|
|
|
|
|
Args: |
|
|
subtitles_file: Path to subtitles JSON file |
|
|
target_panels: Target number of panels (default 12, range 10-15) |
|
|
|
|
|
Returns: |
|
|
List of selected subtitle entries for comic panels |
|
|
""" |
|
|
|
|
|
try: |
|
|
with open(subtitles_file, 'r') as f: |
|
|
subtitles = json.load(f) |
|
|
except: |
|
|
print(f"❌ Failed to load subtitles from {subtitles_file}") |
|
|
return [] |
|
|
|
|
|
if not subtitles: |
|
|
return [] |
|
|
|
|
|
print(f"📖 Analyzing {len(subtitles)} subtitles for meaningful story moments...") |
|
|
|
|
|
|
|
|
scored_subtitles = [] |
|
|
for i, sub in enumerate(subtitles): |
|
|
score = self._score_subtitle(sub, i, len(subtitles)) |
|
|
scored_subtitles.append((score, i, sub)) |
|
|
|
|
|
|
|
|
scored_subtitles.sort(key=lambda x: x[0], reverse=True) |
|
|
|
|
|
|
|
|
selected_indices = self._select_story_panels(scored_subtitles, target_panels, len(subtitles)) |
|
|
|
|
|
|
|
|
selected_indices.sort() |
|
|
selected_subtitles = [subtitles[i] for i in selected_indices] |
|
|
|
|
|
print(f"✅ Selected {len(selected_subtitles)} meaningful story moments") |
|
|
|
|
|
return selected_subtitles |
|
|
|
|
|
def _score_subtitle(self, subtitle: Dict, index: int, total: int) -> float: |
|
|
"""Score a subtitle based on story importance""" |
|
|
text = subtitle.get('text', '').lower() |
|
|
score = 0.0 |
|
|
|
|
|
|
|
|
words = text.split() |
|
|
if len(words) > 5: |
|
|
score += 2.0 |
|
|
elif len(words) > 3: |
|
|
score += 1.0 |
|
|
|
|
|
|
|
|
position = index / total |
|
|
if position < 0.1: |
|
|
score += 3.0 |
|
|
for keyword in self.story_keywords['introduction']: |
|
|
if keyword in text: |
|
|
score += 2.0 |
|
|
|
|
|
elif position > 0.85: |
|
|
score += 3.0 |
|
|
for keyword in self.story_keywords['resolution']: |
|
|
if keyword in text: |
|
|
score += 2.0 |
|
|
|
|
|
elif 0.4 < position < 0.6: |
|
|
score += 2.0 |
|
|
for keyword in self.story_keywords['climax']: |
|
|
if keyword in text: |
|
|
score += 3.0 |
|
|
|
|
|
|
|
|
for keyword in self.story_keywords['conflict'] + self.story_keywords['action']: |
|
|
if keyword in text: |
|
|
score += 2.5 |
|
|
|
|
|
|
|
|
for keyword in self.story_keywords['emotion']: |
|
|
if keyword in text: |
|
|
score += 2.0 |
|
|
|
|
|
|
|
|
if '?' in text: |
|
|
score += 1.5 |
|
|
if '!' in text: |
|
|
score += 2.0 |
|
|
|
|
|
|
|
|
for word in words: |
|
|
if len(word) > 2 and word[0].isupper() and word not in ['I', 'The', 'A', 'An']: |
|
|
score += 1.0 |
|
|
break |
|
|
|
|
|
|
|
|
if '"' in text or "'" in text: |
|
|
score += 1.0 |
|
|
|
|
|
return score |
|
|
|
|
|
def _select_story_panels(self, scored_subtitles: List[Tuple], target: int, total: int) -> List[int]: |
|
|
"""Select panels ensuring good story coverage""" |
|
|
selected = [] |
|
|
|
|
|
|
|
|
intro_candidates = [(s, i, sub) for s, i, sub in scored_subtitles if i < total * 0.1] |
|
|
if intro_candidates: |
|
|
selected.append(intro_candidates[0][1]) |
|
|
|
|
|
|
|
|
conclusion_candidates = [(s, i, sub) for s, i, sub in scored_subtitles if i > total * 0.9] |
|
|
if conclusion_candidates: |
|
|
selected.append(conclusion_candidates[0][1]) |
|
|
|
|
|
|
|
|
middle_candidates = [(s, i, sub) for s, i, sub in scored_subtitles |
|
|
if i not in selected and total * 0.1 <= i <= total * 0.9] |
|
|
|
|
|
|
|
|
min_spacing = max(1, total // (target * 2)) |
|
|
|
|
|
for score, idx, sub in middle_candidates: |
|
|
|
|
|
too_close = False |
|
|
for selected_idx in selected: |
|
|
if abs(idx - selected_idx) < min_spacing: |
|
|
too_close = True |
|
|
break |
|
|
|
|
|
if not too_close: |
|
|
selected.append(idx) |
|
|
|
|
|
if len(selected) >= target: |
|
|
break |
|
|
|
|
|
|
|
|
if len(selected) < target: |
|
|
remaining = [(s, i, sub) for s, i, sub in scored_subtitles if i not in selected] |
|
|
for score, idx, sub in remaining[:target - len(selected)]: |
|
|
selected.append(idx) |
|
|
|
|
|
return selected[:target] |
|
|
|
|
|
def get_adaptive_layout(self, num_panels: int) -> List[Dict]: |
|
|
"""Get adaptive page layout based on number of panels |
|
|
|
|
|
Returns layout configuration for pages |
|
|
""" |
|
|
layouts = [] |
|
|
|
|
|
if num_panels <= 4: |
|
|
|
|
|
layouts.append({ |
|
|
'panels_per_page': 4, |
|
|
'rows': 2, |
|
|
'cols': 2 |
|
|
}) |
|
|
elif num_panels <= 6: |
|
|
|
|
|
layouts.append({ |
|
|
'panels_per_page': 6, |
|
|
'rows': 2, |
|
|
'cols': 3 |
|
|
}) |
|
|
elif num_panels <= 9: |
|
|
|
|
|
layouts.append({ |
|
|
'panels_per_page': 9, |
|
|
'rows': 3, |
|
|
'cols': 3 |
|
|
}) |
|
|
elif num_panels <= 12: |
|
|
|
|
|
layouts.extend([ |
|
|
{'panels_per_page': 6, 'rows': 2, 'cols': 3}, |
|
|
{'panels_per_page': 6, 'rows': 2, 'cols': 3} |
|
|
]) |
|
|
else: |
|
|
|
|
|
remaining = num_panels |
|
|
while remaining > 0: |
|
|
if remaining >= 6: |
|
|
layouts.append({ |
|
|
'panels_per_page': 6, |
|
|
'rows': 2, |
|
|
'cols': 3 |
|
|
}) |
|
|
remaining -= 6 |
|
|
elif remaining >= 4: |
|
|
layouts.append({ |
|
|
'panels_per_page': 4, |
|
|
'rows': 2, |
|
|
'cols': 2 |
|
|
}) |
|
|
remaining -= 4 |
|
|
else: |
|
|
layouts.append({ |
|
|
'panels_per_page': remaining, |
|
|
'rows': 1, |
|
|
'cols': remaining |
|
|
}) |
|
|
remaining = 0 |
|
|
|
|
|
return layouts |
|
|
|
|
|
def create_story_timeline(self, selected_subtitles: List[Dict]) -> Dict: |
|
|
"""Create a story timeline with phases""" |
|
|
total = len(selected_subtitles) |
|
|
|
|
|
timeline = { |
|
|
'introduction': selected_subtitles[:int(total * 0.2)], |
|
|
'development': selected_subtitles[int(total * 0.2):int(total * 0.5)], |
|
|
'climax': selected_subtitles[int(total * 0.5):int(total * 0.8)], |
|
|
'resolution': selected_subtitles[int(total * 0.8):] |
|
|
} |
|
|
|
|
|
|
|
|
for phase, subs in timeline.items(): |
|
|
if not subs and selected_subtitles: |
|
|
|
|
|
if phase == 'introduction': |
|
|
timeline[phase] = [selected_subtitles[0]] |
|
|
elif phase == 'resolution': |
|
|
timeline[phase] = [selected_subtitles[-1]] |
|
|
else: |
|
|
mid = len(selected_subtitles) // 2 |
|
|
timeline[phase] = [selected_subtitles[mid]] |
|
|
|
|
|
return timeline |