|
|
import io |
|
|
import base64 |
|
|
import os |
|
|
from PIL import Image, ImageDraw, ImageFont |
|
|
import config |
|
|
import warnings |
|
|
import textwrap |
|
|
from pathlib import Path |
|
|
import time |
|
|
from models.image_generation import generate_image_fn |
|
|
from google.generativeai import GenerativeModel |
|
|
import json |
|
|
import re |
|
|
import tempfile |
|
|
import shutil |
|
|
from google.generativeai.types import GenerationConfig |
|
|
from utils.comic_panel_splitter import split_comic_panels |
|
|
import cv2 |
|
|
import numpy as np |
|
|
from datetime import datetime |
|
|
warnings.filterwarnings("ignore", message="IMAGE_SAFETY is not a valid FinishReason") |
|
|
|
|
|
|
|
|
def log_execution(func): |
|
|
def wrapper(*args, **kwargs): |
|
|
start_time = time.time() |
|
|
start_str = datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S') |
|
|
|
|
|
result = func(*args, **kwargs) |
|
|
|
|
|
end_time = time.time() |
|
|
end_str = datetime.fromtimestamp(end_time).strftime('%Y-%m-%d %H:%M:%S') |
|
|
duration = end_time - start_time |
|
|
|
|
|
return result |
|
|
return wrapper |
|
|
|
|
|
class ComicImageGenerator: |
|
|
""" |
|
|
Generates a comic-style image. |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
pass |
|
|
|
|
|
|
|
|
@log_execution |
|
|
def generate_comic(self, story_data, output_path=None, style=None): |
|
|
""" |
|
|
Generate a comic-style image based on the provided story data. |
|
|
|
|
|
Args: |
|
|
story_data: Dictionary containing the story information |
|
|
output_path: Optional path to save the resulting image |
|
|
style: Optional comic style to use |
|
|
|
|
|
Returns: |
|
|
PIL.Image.Image: The comic image |
|
|
str: Base64 encoded data URL of the image |
|
|
""" |
|
|
title = story_data.get("title", "My Story") |
|
|
description = story_data.get("description", "") |
|
|
characters = story_data.get("characters", []) |
|
|
settings = story_data.get("settings", []) |
|
|
num_scenes = 9 |
|
|
|
|
|
prompt = self._create_comic_prompt(title, description, characters, settings, style, num_scenes) |
|
|
|
|
|
try: |
|
|
print(f"Generating comic with {num_scenes} scenes...") |
|
|
|
|
|
comic_image = generate_image_fn( |
|
|
selected_prompt=prompt, |
|
|
output_path=output_path |
|
|
) |
|
|
|
|
|
if comic_image is None: |
|
|
comic_image = self._create_placeholder_comic(title, description) |
|
|
|
|
|
if output_path: |
|
|
directory = os.path.dirname(output_path) |
|
|
if directory and not os.path.exists(directory): |
|
|
os.makedirs(directory) |
|
|
comic_image.save(output_path) |
|
|
|
|
|
buffered = io.BytesIO() |
|
|
comic_image.save(buffered, format="PNG") |
|
|
img_bytes = buffered.getvalue() |
|
|
img_b64 = base64.b64encode(img_bytes).decode("utf-8") |
|
|
data_url = f"data:image/png;base64,{img_b64}" |
|
|
|
|
|
return comic_image, data_url |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error generating comic: {str(e)}") |
|
|
placeholder = self._create_placeholder_comic(title, description) |
|
|
|
|
|
if output_path: |
|
|
directory = os.path.dirname(output_path) |
|
|
if directory and not os.path.exists(directory): |
|
|
os.makedirs(directory) |
|
|
placeholder.save(output_path) |
|
|
|
|
|
buffered = io.BytesIO() |
|
|
placeholder.save(buffered, format="PNG") |
|
|
img_bytes = buffered.getvalue() |
|
|
img_b64 = base64.b64encode(img_bytes).decode("utf-8") |
|
|
data_url = f"data:image/png;base64,{img_b64}" |
|
|
|
|
|
return placeholder, data_url |
|
|
|
|
|
@log_execution |
|
|
def _create_comic_prompt(self, title, description, characters=None, settings=None, style=None, num_scenes=1): |
|
|
""" |
|
|
Create a sophisticated, optimized prompt for comic generation with advanced visual consistency techniques. |
|
|
Specialized for high-quality multi-panel storytelling with perfect character continuity. |
|
|
|
|
|
Args: |
|
|
title: Title of the story |
|
|
description: Visual description of the story |
|
|
characters: List of character data |
|
|
settings: List of setting data |
|
|
style: Optional visual style |
|
|
num_scenes: Number of scenes to include (1-24) |
|
|
|
|
|
Returns: |
|
|
str: Advanced prompt optimized for professional comic generation with smart detail preservation |
|
|
""" |
|
|
|
|
|
priority_sections = [] |
|
|
|
|
|
layout_specs = self._get_optimal_layout_description(num_scenes) |
|
|
priority_sections.append(f"CRITICAL LAYOUT: {layout_specs}") |
|
|
|
|
|
if num_scenes >= 20: |
|
|
compact_instructions = [ |
|
|
"π― COMPACT SCENE MASTERY FOR 20 PANELS:", |
|
|
"SMALL EFFICIENT SCENES: Each panel must tell its story moment with maximum visual economy - focus on ONE key action, emotion, or story beat per panel", |
|
|
"CLEAR FOCAL POINTS: Every panel needs ONE main subject in sharp focus with minimal background distractions", |
|
|
"ESSENTIAL ELEMENTS ONLY: Include only the most crucial visual elements needed to advance the story - remove ALL unnecessary details", |
|
|
"READABLE AT SMALL SIZE: No text text, expressions, and actions must be clearly visible even when the panel is small - use bold, simple compositions" |
|
|
] |
|
|
priority_sections.extend(compact_instructions) |
|
|
|
|
|
if characters: |
|
|
character_details = self._create_detailed_character_specifications(characters, num_scenes) |
|
|
priority_sections.extend(character_details) |
|
|
|
|
|
enhanced_story = self._create_detailed_story_description(description, title) |
|
|
priority_sections.append(enhanced_story) |
|
|
|
|
|
if settings: |
|
|
environment_details = self._create_detailed_environment_specifications(settings, num_scenes) |
|
|
priority_sections.extend(environment_details) |
|
|
|
|
|
technical_specs = self._create_comprehensive_technical_specifications(style, num_scenes) |
|
|
priority_sections.extend(technical_specs) |
|
|
|
|
|
quality_flow = self._create_advanced_quality_and_flow_instructions(num_scenes) |
|
|
priority_sections.extend(quality_flow) |
|
|
|
|
|
assembled_prompt = self._assemble_prompt_with_smart_truncation(priority_sections) |
|
|
|
|
|
if self.generate_panel_descriptions(assembled_prompt): |
|
|
final_prompt = self.generate_panel_descriptions(assembled_prompt) |
|
|
else : |
|
|
final_prompt = assembled_prompt |
|
|
|
|
|
print(f"\n XXXXXX {final_prompt} XXXXXX \n") |
|
|
|
|
|
return final_prompt |
|
|
|
|
|
@log_execution |
|
|
def _create_detailed_character_specifications(self, characters, num_scenes): |
|
|
"""Create extremely detailed character specifications prioritizing visual consistency.""" |
|
|
char_specs = [] |
|
|
|
|
|
char_specs.append("π CRITICAL CHARACTER CONSISTENCY PROTOCOL:") |
|
|
char_specs.append("ABSOLUTE REQUIREMENT: Characters MUST look identical in every single panel - same face, hair, clothes, proportions, expressions style") |
|
|
|
|
|
for i, character in enumerate(characters[:3]): |
|
|
if isinstance(character, dict) and "visual_description" in character: |
|
|
char_name = character.get("name", f"Character_{i+1}") |
|
|
char_desc = character["visual_description"] |
|
|
|
|
|
char_spec = f"CHARACTER {i+1} - {char_name}: {char_desc}" |
|
|
|
|
|
if "traits" in character and character["traits"]: |
|
|
traits = character["traits"][:5] |
|
|
char_spec += f" | DISTINCTIVE FEATURES: {', '.join(traits)}" |
|
|
|
|
|
char_spec += f" | CONSISTENCY RULE: This exact appearance must be maintained across all {num_scenes} panels with zero variation in facial features, hair, clothing, or body proportions" |
|
|
|
|
|
char_specs.append(char_spec) |
|
|
|
|
|
if len([c for c in characters[:3] if isinstance(c, dict) and 'visual_description' in c]) > 1: |
|
|
char_specs.append(f"MULTI-CHARACTER RULE: All characters must maintain their exact individual appearances simultaneously across all {num_scenes} panels - no character design drift allowed") |
|
|
|
|
|
return char_specs |
|
|
|
|
|
@log_execution |
|
|
def _create_detailed_story_description(self, description, title): |
|
|
"""Create enhanced story description with preserved important details.""" |
|
|
story_elements = [] |
|
|
|
|
|
enhanced_desc = f"STORY CONTENT: {title} - {description}" |
|
|
|
|
|
enhanced_desc += " | VISUAL NARRATIVE FOCUS: Every detail must be clearly visible and contribute to story comprehension through imagery alone" |
|
|
|
|
|
enhanced_desc += " | ATMOSPHERIC DETAILS: Include specific lighting, weather, time of day, and environmental mood indicators that enhance the narrative" |
|
|
|
|
|
enhanced_desc += " | CHARACTER EXPRESSION CLARITY: All emotions, reactions, and character intentions must be immediately readable through facial expressions, body language, and positioning" |
|
|
|
|
|
return enhanced_desc |
|
|
@log_execution |
|
|
def _create_detailed_environment_specifications(self, settings, num_scenes): |
|
|
"""Create detailed environment specifications with consistency focus.""" |
|
|
env_specs = [] |
|
|
|
|
|
env_specs.append(" ENVIRONMENTAL CONSISTENCY PROTOCOL:") |
|
|
|
|
|
for i, setting in enumerate(settings[:3]): |
|
|
if isinstance(setting, dict) and "description" in setting: |
|
|
setting_name = setting.get("name", f"Location_{i+1}") |
|
|
setting_desc = setting["description"] |
|
|
|
|
|
env_spec = f"LOCATION {i+1} - {setting_name}: {setting_desc}" |
|
|
|
|
|
if "visual_elements" in setting and setting["visual_elements"]: |
|
|
elements = setting["visual_elements"][:5] |
|
|
env_spec += f" | KEY VISUAL MARKERS: {', '.join(elements)}" |
|
|
|
|
|
if "mood" in setting: |
|
|
env_spec += f" | ATMOSPHERE: {setting['mood']}" |
|
|
|
|
|
env_spec += f" | LOCATION CONSISTENCY: When this location appears across multiple panels, all architectural details, lighting, and distinctive features must remain identical" |
|
|
|
|
|
env_specs.append(env_spec) |
|
|
|
|
|
return env_specs |
|
|
@log_execution |
|
|
def _create_comprehensive_technical_specifications(self, style, num_scenes): |
|
|
"""Create comprehensive technical specifications with detail preservation.""" |
|
|
tech_specs = [] |
|
|
|
|
|
style_details = self._get_enhanced_style_specifications(style) |
|
|
tech_specs.extend(style_details) |
|
|
|
|
|
composition_specs = [ |
|
|
" PANEL COMPOSITION MASTERY:", |
|
|
f"Grid Layout: Precisely arranged {self._calculate_optimal_grid_layout(num_scenes)} grid with professional comic book spacing and clear panel borders", |
|
|
"Visual Hierarchy: Each panel must have a clear focal point with supporting details that enhance rather than distract from the main action", |
|
|
"Depth and Perspective: Use foreground, midground, and background elements to create visual depth and spatial relationships", |
|
|
"Color Harmony: Maintain consistent color palette across all panels while using color psychology to enhance mood and narrative flow" |
|
|
] |
|
|
|
|
|
if num_scenes >= 20: |
|
|
composition_specs.extend([ |
|
|
"COMPACT PANEL OPTIMIZATION: Design each panel for MAXIMUM visual impact in minimal space", |
|
|
"SIMPLE BACKGROUNDS: Use minimal, clean backgrounds that don't compete with main subjects", |
|
|
"BOLD CHARACTER POSES: Use clear, distinctive poses and gestures that read well at small sizes", |
|
|
"HIGH CONTRAST: Ensure strong contrast between characters and backgrounds for clarity" |
|
|
]) |
|
|
|
|
|
tech_specs.extend(composition_specs) |
|
|
|
|
|
detail_specs = [ |
|
|
" DETAIL PRESERVATION PROTOCOL:", |
|
|
"Facial Detail Consistency: All character faces must maintain identical features - eye shape, nose structure, mouth proportions, facial hair, scars, or distinctive marks", |
|
|
"Clothing and Accessory Continuity: Every piece of clothing, jewelry, weapons, or accessories must appear identical across panels", |
|
|
"Environmental Detail Tracking: Background objects, architectural elements, vegetation, and atmospheric effects must remain consistent when locations reappear", |
|
|
"Lighting Continuity: Maintain logical light sources and shadow patterns that reflect time of day and weather conditions consistently" |
|
|
] |
|
|
tech_specs.extend(detail_specs) |
|
|
|
|
|
return tech_specs |
|
|
@log_execution |
|
|
def _get_enhanced_style_specifications(self, style): |
|
|
"""Get enhanced style specifications with technical details.""" |
|
|
enhanced_styles = { |
|
|
"Comic Book Style": [ |
|
|
" MODERN DIGITAL COMIC BOOK STYLE (NO SKETCH LINES, NO DEFORMITIES):", |
|
|
"Line Art: Bold, ultra-clean digital inking with consistent stroke weightβabsolutely no rough sketch lines or unfinished strokes", |
|
|
"Color Treatment: Vibrant, saturated colors with polished cel-shading and sharp highlights for a glossy modern finish", |
|
|
"Shading: Precise digital shadows and highlightsβavoid gradient banding or painterly strokes associated with traditional sketches", |
|
|
"Panel Borders: Clean, geometric panel borders with consistent gutters and professional comic book page layout standards" |
|
|
], |
|
|
"Manga Style": [ |
|
|
" MODERN DIGITAL MANGA STYLE (NO SKETCH LINES, NO DEFORMITIES):", |
|
|
"Line Quality: Razor-sharp digital line work with deliberate varying weightsβcompletely free of rough sketches", |
|
|
"Character Design: Classic manga proportions rendered crisply with expressive eyes and flawless facial detailsβno distortions", |
|
|
"Tone Work: High-resolution screentones and digitally applied hatching for a refined finish", |
|
|
"Panel Layout: Dynamic panel flow with polished angles that enhance narrative pacing" |
|
|
], |
|
|
"Photorealistic": [ |
|
|
" MODERN DIGITAL PHOTOREALISM (NO SKETCH LINES, NO DEFORMITIES):", |
|
|
"Rendering Quality: Cinema-quality realistic rendering with accurate lighting physics and atmospheric effectsβfaces and limbs must appear intact and natural", |
|
|
"Detail Level: Ultra-high detail textures with crisp edgesβno painterly or sketch artefacts", |
|
|
"Color Accuracy: Natural color grading with realistic skin tones, environmental colors, and accurate material reflectance", |
|
|
"Depth of Field: Professional photography-style focus effects with realistic camera perspective and depth relationships" |
|
|
], |
|
|
"Cinematic Realism": [ |
|
|
" MODERN DIGITAL CINEMATIC REALISM (NO SKETCH LINES, NO DEFORMITIES):", |
|
|
"Film Quality: Movie-grade digital rendering with crisp edges and zero sketch artefacts", |
|
|
"Color Grading: Cinematic color treatment with cohesive paletteβmaintain realistic skin and material fidelity", |
|
|
"Camera Work: Dynamic camera angles translated into polished panel compositions", |
|
|
"Lighting Design: Professional film lighting with atmospheric effectsβensure characters remain fully detailed, no distortions" |
|
|
] |
|
|
} |
|
|
|
|
|
return enhanced_styles.get(style, [ |
|
|
" MODERN DIGITAL COMIC ILLUSTRATION (NO SKETCH LINES, NO DEFORMITIES):", |
|
|
"Professional Art: Gallery-quality digital illustration with masterful composition, color theory, and technical execution", |
|
|
"Visual Clarity: Crystal-clear details with optimal contrast and saturation for maximum visual impact and readability", |
|
|
"Artistic Consistency: Unified artistic approach across all panels maintaining consistent quality and style treatmentβabsolutely no sketch artefacts" |
|
|
]) |
|
|
@log_execution |
|
|
def _create_advanced_quality_and_flow_instructions(self, num_scenes): |
|
|
"""Create advanced quality and flow instructions.""" |
|
|
quality_instructions = [ |
|
|
" ADVANCED QUALITY REQUIREMENTS:", |
|
|
"Technical Excellence: Ultra-high resolution output with crisp details, optimal contrast, and professional-grade visual quality", |
|
|
"Narrative Clarity: Every panel must advance the story visibly - clear cause and effect relationships between sequential panels", |
|
|
|
|
|
"Line Art: Sharp digital lines, clean and precise, emphasizing dynamic movement and emotional clarity.", |
|
|
"Emotional Impact: Each panel must convey specific emotions through character expressions, body language, and environmental mood" |
|
|
] |
|
|
|
|
|
if num_scenes > 1: |
|
|
flow_instructions = [ |
|
|
f" {num_scenes}-PANEL FLOW MASTERY:", |
|
|
|
|
|
"Action Sequences: Break complex actions into clear, understandable steps across multiple panels", |
|
|
"Character Tracking: Maintain character positions and movements logically across panel transitions", |
|
|
"Pacing Control: Balance action panels with character moments and environmental establishing shots for optimal narrative rhythm", |
|
|
"EACH PANEL IS A DISTINCT SCENE : Each panel must depict a unique, self-contained moment or tableau from the story. No visual elements or action should flow directly from one panel to another" |
|
|
] |
|
|
|
|
|
if num_scenes >= 20: |
|
|
flow_instructions.extend([ |
|
|
"STORY ARC FOR 24 PANELS: Create a complete story with beginning (panels 1-6), rising action (panels 7-12), climax (panels 13-18), and resolution (panels 19-24)", |
|
|
"MICRO-MOMENTS: Each panel captures a single decisive moment - one expression change, one action beat, one story revelation", |
|
|
"VISUAL ECONOMY: Every element in each panel must serve the story - no decorative details that don't advance narrative", |
|
|
"READER ENGAGEMENT: Design panel flow to maintain interest across all 24 panels with strategic use of close-ups, wide shots, and dynamic angles" |
|
|
]) |
|
|
|
|
|
quality_instructions.extend(flow_instructions) |
|
|
|
|
|
return quality_instructions |
|
|
@log_execution |
|
|
def _assemble_prompt_with_smart_truncation(self, priority_sections): |
|
|
"""Assemble prompt with smart truncation that preserves critical details.""" |
|
|
MAX_LENGTH = 31500 |
|
|
|
|
|
full_prompt = " || ".join(priority_sections) |
|
|
|
|
|
if len(full_prompt) <= MAX_LENGTH: |
|
|
negative_prompt = "NEGATIVE PROMPTS: NO deformed anatomy, NO missing limbs or facial features, NO inconsistent character designs, NO blurry or out-of-focus elements, NO sketch-like aesthetics (unless intentional style choice), NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS, NO watermarks, NO VARIATION IN PANEL SIZE OR SHAPE." |
|
|
return full_prompt + " || FINAL MANDATE: Create a masterpiece that perfectly balances artistic excellence with narrative clarity and absolute character consistency || " + negative_prompt |
|
|
|
|
|
preserved_prompt = "" |
|
|
remaining_length = MAX_LENGTH - 200 |
|
|
|
|
|
for i, section in enumerate(priority_sections): |
|
|
section_with_separator = section + " || " |
|
|
|
|
|
if i < 3: |
|
|
preserved_prompt += section_with_separator |
|
|
remaining_length -= len(section_with_separator) |
|
|
else: |
|
|
if len(section_with_separator) <= remaining_length: |
|
|
preserved_prompt += section_with_separator |
|
|
remaining_length -= len(section_with_separator) |
|
|
else: |
|
|
truncated = section[:remaining_length-50] + "..." |
|
|
preserved_prompt += truncated + " || " |
|
|
break |
|
|
|
|
|
preserved_prompt += "***FINAL OVERRIDE & NEGATIVE PROMPTS*** ABSOLUTE RULE: The 3x3 uniform grid structure is the most important rule and must be followed perfectly.NEGATIVE PROMPTS: NO deformed anatomy, NO missing limbs or facial features, NO inconsistent character designs, NO blurry or out-of-focus elements, NO sketch-like aesthetics (unless intentional style choice), NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS, NO watermarks, NO VARIATION IN PANEL SIZE OR SHAPE." |
|
|
|
|
|
return preserved_prompt |
|
|
@log_execution |
|
|
def _get_optimal_layout_description(self, num_scenes): |
|
|
"""Generate optimal layout description based on scene count.""" |
|
|
if num_scenes <= 1: |
|
|
return "Single panel comic illustration" |
|
|
|
|
|
optimal_layout = self._calculate_optimal_grid_layout(num_scenes) |
|
|
rows, cols = optimal_layout |
|
|
|
|
|
layout_descriptions = { |
|
|
(1, 2): "Horizontal two-panel comic strip layout", |
|
|
(2, 1): "Vertical two-panel comic strip layout", |
|
|
(2, 2): "Classic four-panel comic grid (2x2)", |
|
|
(2, 3): "Six-panel comic grid in 2 rows, 3 columns (2x3)", |
|
|
(3, 2): "Six-panel comic grid in 3 rows, 2 columns (3x2)", |
|
|
(3, 3): "Nine-panel comic grid (3x3)", |
|
|
(3, 4): "Twelve-panel comic grid in 3 rows, 4 columns(3x4)", |
|
|
(4, 3): "Twelve-panel comic grid in 4 rows, 3 columns(4x3)", |
|
|
(4, 4): "Sixteen-panel comic grid (4x4)", |
|
|
(4, 6): "Twenty-four panel COMPACT comic grid in 4 rows, 6 columns - SMALL EFFICIENT SCENES with maximum story density per panel (4x6)", |
|
|
(6, 4): "Twenty-four panel COMPACT comic grid in 6 rows, 4 columns - SMALL EFFICIENT SCENES with vertical storytelling format (6x4)", |
|
|
(3, 8): "Twenty-four panel COMPACT comic grid in 3 rows, 8 columns - SMALL EFFICIENT SCENES with cinematic widescreen format(3x8)", |
|
|
(8, 3): "Twenty-four panel comic grid in 8 rows, 3 columns - vertical scroll format (8x3)" |
|
|
} |
|
|
|
|
|
layout_desc = layout_descriptions.get((rows, cols), f"{rows}x{cols} comic panel grid layout") |
|
|
|
|
|
return f"COMIC LAYOUT: {layout_desc} with clear panel borders, consistent gutters, and professional comic book formatting" |
|
|
@log_execution |
|
|
def _enhance_description_for_visual_consistency(self, description): |
|
|
"""Enhance the core description with visual consistency keywords.""" |
|
|
consistency_enhancers = [ |
|
|
"maintaining perfect visual consistency throughout all panels", |
|
|
"identical character appearances across every scene", |
|
|
"unified lighting and color palette", |
|
|
"consistent artistic style and perspective" |
|
|
] |
|
|
|
|
|
enhanced = f"STORY CONTENT: {description}. " |
|
|
enhanced += "VISUAL CONSISTENCY REQUIREMENTS: " + ", ".join(consistency_enhancers) |
|
|
|
|
|
return enhanced |
|
|
@log_execution |
|
|
def _create_character_consistency_anchors(self, characters, num_scenes): |
|
|
"""Create sophisticated character consistency instructions.""" |
|
|
anchors = [] |
|
|
|
|
|
if characters: |
|
|
anchors.append("CHARACTER CONSISTENCY ANCHORS:") |
|
|
|
|
|
for i, character in enumerate(characters[:2]): |
|
|
if isinstance(character, dict) and "visual_description" in character: |
|
|
char_desc = character["visual_description"] |
|
|
|
|
|
anchor = f"Character {i+1}: {char_desc} - MUST appear IDENTICAL in every single panel with exact same: facial features, hair style, clothing, proportions, and distinctive visual elements" |
|
|
anchors.append(anchor) |
|
|
|
|
|
if num_scenes > 1: |
|
|
anchors.append(f"CRITICAL: All {len([c for c in characters[:2] if isinstance(c, dict) and 'visual_description' in c])} characters must look exactly the same across all {num_scenes} panels - same faces, same outfits, same proportions, same artistic rendering") |
|
|
|
|
|
return anchors |
|
|
@log_execution |
|
|
def _create_environment_consistency_anchors(self, settings, num_scenes): |
|
|
"""Create environmental consistency instructions.""" |
|
|
anchors = [] |
|
|
|
|
|
if settings: |
|
|
anchors.append("ENVIRONMENTAL CONSISTENCY:") |
|
|
|
|
|
for setting in settings: |
|
|
if isinstance(setting, dict) and "description" in setting: |
|
|
setting_desc = setting["description"] |
|
|
anchors.append(f"Setting: {setting_desc} - maintain consistent architectural details, lighting, and atmospheric elements when this location appears") |
|
|
|
|
|
if num_scenes > 1: |
|
|
anchors.append(f"Ensure environmental continuity across all {num_scenes} panels with logical spatial relationships and consistent time-of-day lighting") |
|
|
|
|
|
return anchors |
|
|
@log_execution |
|
|
def _create_advanced_style_instructions(self, style, num_scenes): |
|
|
"""Create advanced style instructions with technical specifications.""" |
|
|
instructions = [] |
|
|
|
|
|
advanced_style_map = { |
|
|
"Comic Book Style": [ |
|
|
"modern digital comic book illustration style (no sketch-like strokes, no deformities)", |
|
|
"bold ultra-clean line art with consistent stroke weight", |
|
|
"vibrant saturated colors with polished highlights and shadows", |
|
|
"dynamic panel compositions with varied camera angles", |
|
|
"classic comic book rendering techniques executed with a contemporary digital finish" |
|
|
], |
|
|
"Manga Style": [ |
|
|
"modern digital manga illustration style (no sketch artefacts, no deformities)", |
|
|
"razor-sharp line work with deliberate varying weights", |
|
|
"subtle color palette with high-resolution screentone effects", |
|
|
"expressive character designs with flawless facial details", |
|
|
"dynamic manga panel composition and flow" |
|
|
], |
|
|
"Cartoon Style": [ |
|
|
"polished digital cartoon style (clean vectors, no sketch lines, no deformities)", |
|
|
"smooth rounded character designs with appealing proportions", |
|
|
"bright harmonious color schemes with soft lighting", |
|
|
"clear readable expressions and body language", |
|
|
"family-friendly visual appeal with consistent character models" |
|
|
], |
|
|
"Photorealistic": [ |
|
|
"high-quality digital photorealism (no sketch artefacts, no deformities)", |
|
|
"detailed realistic lighting and shadows", |
|
|
"natural color grading with realistic materials and textures", |
|
|
"cinematic composition with depth of field effects", |
|
|
"professional photography-inspired visual quality" |
|
|
], |
|
|
"Cinematic Realism": [ |
|
|
"digital cinematic realism (crisp, no sketch lines, no deformities)", |
|
|
"dramatic lighting with atmospheric effects", |
|
|
"rich color grading with cinematic color palette", |
|
|
"dynamic camera angles and professional composition", |
|
|
"film-quality character rendering and environmental detail" |
|
|
], |
|
|
"Digital Painting": [ |
|
|
"masterful digital painting technique with a polished finish (no sketch lines, no deformities)", |
|
|
"controlled painterly brushwork with intentional texture and depth", |
|
|
"rich color harmony with sophisticated lighting", |
|
|
"artistic composition with traditional painting principles", |
|
|
"high-end digital art gallery quality" |
|
|
] |
|
|
} |
|
|
|
|
|
if style and style in advanced_style_map: |
|
|
instructions.append("ARTISTIC STYLE SPECIFICATIONS:") |
|
|
instructions.extend(advanced_style_map[style]) |
|
|
else: |
|
|
instructions.extend([ |
|
|
"ARTISTIC STYLE: High-quality illustration with professional comic book aesthetics", |
|
|
"clean precise line work with consistent artistic rendering", |
|
|
"harmonious color palette with strategic lighting effects", |
|
|
"polished visual presentation with attention to detail" |
|
|
]) |
|
|
|
|
|
if num_scenes > 1: |
|
|
instructions.append(f"STYLE CONSISTENCY: Maintain identical artistic style, line weight, color saturation, and rendering quality across all {num_scenes} panels") |
|
|
|
|
|
return instructions |
|
|
@log_execution |
|
|
def _create_panel_flow_instructions(self, num_scenes): |
|
|
"""Create instructions for optimal panel flow and transitions.""" |
|
|
flow_instructions = [] |
|
|
|
|
|
if num_scenes > 1: |
|
|
flow_instructions.extend([ |
|
|
"PANEL FLOW AND TRANSITIONS:", |
|
|
"create smooth visual flow from panel to panel following standard left-to-right, top-to-bottom reading order", |
|
|
"design panel compositions that guide the eye naturally through the sequence", |
|
|
"establish clear visual relationships between consecutive panels", |
|
|
"use consistent perspective and scale to maintain spatial continuity", |
|
|
"create visual rhythm through varied but harmonious panel compositions" |
|
|
]) |
|
|
|
|
|
if num_scenes >= 10: |
|
|
flow_instructions.extend([ |
|
|
"COMPREHENSIVE STORYTELLING FLOW: Design a compelling visual narrative that maintains engagement across all 12 panels", |
|
|
"balance action panels with character moments and environmental establishing shots", |
|
|
"create visual crescendos and quiet beats for optimal pacing", |
|
|
"ensure each panel contributes meaningfully to the overall story progression" |
|
|
]) |
|
|
|
|
|
return flow_instructions |
|
|
@log_execution |
|
|
def _create_quality_specifications(self, num_scenes): |
|
|
"""Create technical quality specifications.""" |
|
|
quality_specs = [ |
|
|
"TECHNICAL QUALITY REQUIREMENTS:", |
|
|
"ultra-high resolution with crisp clean details", |
|
|
"professional comic book production quality", |
|
|
"optimal contrast and saturation for visual clarity", |
|
|
"balanced composition with clear focal points in each panel", |
|
|
"masterful use of negative space and visual hierarchy" |
|
|
] |
|
|
|
|
|
if num_scenes > 1: |
|
|
quality_specs.extend([ |
|
|
f"perfect grid alignment with consistent panel spacing across all {num_scenes} panels", |
|
|
"clear panel borders with professional gutters and margins", |
|
|
"unified visual presentation suitable for professional comic publication" |
|
|
]) |
|
|
|
|
|
return quality_specs |
|
|
@log_execution |
|
|
def _optimize_prompt_structure(self, prompt_parts): |
|
|
"""Optimize the prompt structure for maximum AI comprehension.""" |
|
|
structured_prompt = [] |
|
|
|
|
|
for i, part in enumerate(prompt_parts): |
|
|
if isinstance(part, list): |
|
|
structured_prompt.append(" | ".join(part)) |
|
|
else: |
|
|
structured_prompt.append(part) |
|
|
|
|
|
final_prompt = " || ".join(structured_prompt) |
|
|
|
|
|
final_prompt += " || FINAL REQUIREMENT: Create a masterpiece-quality comic that perfectly balances artistic excellence with clear storytelling" |
|
|
|
|
|
return final_prompt |
|
|
@log_execution |
|
|
def _calculate_optimal_grid_layout(self, num_scenes): |
|
|
"""Calculate the most visually appealing grid layout for the given number of scenes.""" |
|
|
optimal_layouts = { |
|
|
1: (1, 1), |
|
|
2: (1, 2), |
|
|
3: (1, 3), |
|
|
4: (2, 2), |
|
|
5: (1, 5), |
|
|
6: (2, 3), |
|
|
7: (1, 7), |
|
|
8: (2, 4), |
|
|
9: (3, 3), |
|
|
10: (2, 5), |
|
|
11: (1, 11), |
|
|
12: (3, 4), |
|
|
13: (1, 13), |
|
|
14: (2, 7), |
|
|
15: (3, 5), |
|
|
16: (4, 4), |
|
|
17: (1, 17), |
|
|
18: (3, 6), |
|
|
19: (1, 19), |
|
|
20: (4, 5), |
|
|
21: (3, 7), |
|
|
22: (2, 11), |
|
|
23: (1, 23), |
|
|
24: (4, 6), |
|
|
} |
|
|
|
|
|
return optimal_layouts.get(num_scenes, self._calculate_optimal_layout(num_scenes, 1024, 768)) |
|
|
|
|
|
def _create_placeholder_comic(self, title, description): |
|
|
""" |
|
|
Create a placeholder comic if image generation fails. |
|
|
|
|
|
Args: |
|
|
title: Title of the comic |
|
|
description: Visual description of the comic |
|
|
|
|
|
Returns: |
|
|
PIL.Image.Image: Placeholder comic image |
|
|
""" |
|
|
width, height = 800, 600 |
|
|
|
|
|
comic = Image.new("RGB", (width, height), (255, 255, 255)) |
|
|
draw = ImageDraw.Draw(comic) |
|
|
|
|
|
try: |
|
|
title_font = ImageFont.truetype("Arial.ttf", 36) |
|
|
desc_font = ImageFont.truetype("Arial.ttf", 18) |
|
|
except IOError: |
|
|
title_font = desc_font = ImageFont.load_default() |
|
|
|
|
|
draw.text((20, 20), title, fill=(0, 0, 0), font=title_font) |
|
|
|
|
|
draw.rectangle([50, 80, width-50, height-50], outline=(0, 0, 0), fill=(220, 220, 220)) |
|
|
|
|
|
if description: |
|
|
max_chars = 300 |
|
|
short_desc = description[:max_chars] + "..." if len(description) > max_chars else description |
|
|
wrapped_desc = textwrap.fill(short_desc, width=70) |
|
|
draw.text((60, 100), wrapped_desc, fill=(0, 0, 0), font=desc_font) |
|
|
|
|
|
return comic |
|
|
@log_execution |
|
|
def split_comic_into_scenes(self, comic_image, num_scenes, preferred_layout=None, use_gemini_analysis=True): |
|
|
""" |
|
|
Split a comic image into individual scenes using advanced analysis techniques. |
|
|
Optimized for 12-panel layouts with sophisticated grid detection and quality validation. |
|
|
|
|
|
Args: |
|
|
comic_image: PIL.Image.Image object of the comic |
|
|
num_scenes: Expected number of scenes (for context only, OpenCV script auto-detects) |
|
|
preferred_layout: Optional tuple (rows, cols) to override automatic detection (Not used by OpenCV) |
|
|
use_gemini_analysis: Whether to use Gemini Vision or OpenCV. |
|
|
True for Gemini (default), False for OpenCV. |
|
|
|
|
|
Returns: |
|
|
list: List of PIL.Image.Image objects, one for each detected scene |
|
|
""" |
|
|
if not isinstance(comic_image, Image.Image): |
|
|
raise ValueError("comic_image must be a PIL.Image.Image object") |
|
|
|
|
|
if num_scenes <= 1 and not use_gemini_analysis: |
|
|
if num_scenes <= 1: |
|
|
return [comic_image] |
|
|
|
|
|
width, height = comic_image.size |
|
|
print(f"π― Splitting {width}x{height} comic into scenes (Target: {num_scenes} scenes if using grid, auto-detect if OpenCV)...") |
|
|
|
|
|
if use_gemini_analysis: |
|
|
print("π Analyzing comic layout with enhanced Gemini Vision...") |
|
|
if preferred_layout: |
|
|
rows, cols = preferred_layout |
|
|
print(f"π― Using manual override for Gemini: {rows}Γ{cols} layout") |
|
|
else: |
|
|
rows, cols = self.analyze_comic_layout_with_enhanced_gemini(comic_image, num_scenes) |
|
|
|
|
|
rows, cols = self._validate_and_optimize_layout(rows, cols, num_scenes, width, height) |
|
|
|
|
|
actual_panels = rows * cols |
|
|
print(f"β
Using Gemini-derived {rows}Γ{cols} grid layout - will extract {min(actual_panels, num_scenes)} panels") |
|
|
|
|
|
scenes = self._extract_scenes_with_quality_check(comic_image, rows, cols, num_scenes) |
|
|
|
|
|
return scenes |
|
|
else: |
|
|
print("π© Using OpenCV for panel splitting...") |
|
|
temp_dir = tempfile.mkdtemp() |
|
|
temp_image_path = os.path.join(temp_dir, "source_comic.png") |
|
|
panels_output_dir = os.path.join(temp_dir, "output_panels") |
|
|
|
|
|
try: |
|
|
comic_image.save(temp_image_path, "PNG") |
|
|
|
|
|
split_comic_panels(temp_image_path, panels_output_dir) |
|
|
|
|
|
extracted_scenes = [] |
|
|
if os.path.exists(panels_output_dir): |
|
|
panel_files = sorted([f for f in os.listdir(panels_output_dir) if f.startswith("panel_") and f.endswith(".png")]) |
|
|
for panel_file in panel_files: |
|
|
try: |
|
|
panel_image_path = os.path.join(panels_output_dir, panel_file) |
|
|
img = Image.open(panel_image_path) |
|
|
extracted_scenes.append(img) |
|
|
except Exception as e: |
|
|
print(f"Error loading panel image {panel_file}: {e}") |
|
|
|
|
|
if not extracted_scenes: |
|
|
print("β οΈ OpenCV panel splitter did not return any panels. Returning original image.") |
|
|
return [comic_image] |
|
|
|
|
|
print(f"β
OpenCV successfully extracted {len(extracted_scenes)} panels.") |
|
|
return extracted_scenes |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Error during OpenCV panel splitting: {e}") |
|
|
return [comic_image] |
|
|
finally: |
|
|
if os.path.exists(temp_dir): |
|
|
shutil.rmtree(temp_dir) |
|
|
@log_execution |
|
|
def _validate_and_optimize_layout(self, rows, cols, num_scenes, image_width, image_height): |
|
|
"""Validate and optimize the layout based on image properties and panel count.""" |
|
|
panel_width = image_width / cols |
|
|
panel_height = image_height / rows |
|
|
panel_aspect_ratio = panel_width / panel_height |
|
|
|
|
|
if panel_width < 50 or panel_height < 50: |
|
|
print(f"β οΈ Panels too small ({panel_width:.0f}x{panel_height:.0f}). Recalculating layout...") |
|
|
return self._calculate_optimal_grid_layout(num_scenes) |
|
|
|
|
|
if panel_aspect_ratio < 0.2 or panel_aspect_ratio > 5.0: |
|
|
print(f"β οΈ Panel aspect ratio {panel_aspect_ratio:.2f} is extreme. Optimizing layout...") |
|
|
return self._calculate_optimal_grid_layout(num_scenes) |
|
|
|
|
|
if num_scenes == 12: |
|
|
optimal_12_layouts = [(3, 4), (4, 3), (2, 6), (6, 2)] |
|
|
current_layout = (rows, cols) |
|
|
|
|
|
if current_layout not in optimal_12_layouts: |
|
|
image_aspect = image_width / image_height |
|
|
best_layout = (3, 4) |
|
|
best_score = float('inf') |
|
|
|
|
|
for opt_rows, opt_cols in optimal_12_layouts: |
|
|
layout_aspect = opt_cols / opt_rows |
|
|
score = abs(layout_aspect - image_aspect) |
|
|
if score < best_score: |
|
|
best_score = score |
|
|
best_layout = (opt_rows, opt_cols) |
|
|
|
|
|
print(f"π Optimizing 12-panel layout from {rows}Γ{cols} to {best_layout[0]}Γ{best_layout[1]}") |
|
|
return best_layout |
|
|
|
|
|
if num_scenes == 24: |
|
|
optimal_24_layouts = [(4, 6), (6, 4), (3, 8), (8, 3)] |
|
|
current_layout = (rows, cols) |
|
|
|
|
|
if current_layout not in optimal_24_layouts: |
|
|
image_aspect = image_width / image_height |
|
|
best_layout = (4, 6) |
|
|
best_score = float('inf') |
|
|
|
|
|
for opt_rows, opt_cols in optimal_24_layouts: |
|
|
layout_aspect = opt_cols / opt_rows |
|
|
score = abs(layout_aspect - image_aspect) |
|
|
if score < best_score: |
|
|
best_score = score |
|
|
best_layout = (opt_rows, opt_cols) |
|
|
|
|
|
print(f"π Optimizing 24-panel layout from {rows}Γ{cols} to {best_layout[0]}Γ{best_layout[1]} for compact scenes") |
|
|
return best_layout |
|
|
|
|
|
return (rows, cols) |
|
|
@log_execution |
|
|
def _extract_scenes_with_quality_check(self, comic_image, rows, cols, num_scenes): |
|
|
"""Extract scenes with quality validation and enhancement.""" |
|
|
width, height = comic_image.size |
|
|
|
|
|
scene_width = width // cols |
|
|
scene_height = height // rows |
|
|
|
|
|
margin = 2 |
|
|
|
|
|
scenes = [] |
|
|
extracted_count = 0 |
|
|
|
|
|
for row in range(rows): |
|
|
for col in range(cols): |
|
|
if extracted_count >= num_scenes: |
|
|
break |
|
|
|
|
|
x1 = max(0, col * scene_width - margin) |
|
|
y1 = max(0, row * scene_height - margin) |
|
|
x2 = min(width, (col + 1) * scene_width + margin) |
|
|
y2 = min(height, (row + 1) * scene_height + margin) |
|
|
|
|
|
scene = comic_image.crop((x1, y1, x2, y2)) |
|
|
|
|
|
if self._validate_scene_quality(scene): |
|
|
scenes.append(scene) |
|
|
extracted_count += 1 |
|
|
else: |
|
|
print(f"β οΈ Scene {extracted_count + 1} failed quality check, keeping anyway") |
|
|
scenes.append(scene) |
|
|
extracted_count += 1 |
|
|
|
|
|
if extracted_count >= num_scenes: |
|
|
break |
|
|
|
|
|
print(f"β
Successfully extracted {len(scenes)} scenes") |
|
|
return scenes |
|
|
@log_execution |
|
|
def _validate_scene_quality(self, scene): |
|
|
"""Validate that a scene contains meaningful content.""" |
|
|
try: |
|
|
import numpy as np |
|
|
|
|
|
scene_array = np.array(scene) |
|
|
|
|
|
if len(scene_array.shape) == 3: |
|
|
variance = np.var(scene_array) |
|
|
if variance < 10: |
|
|
return False |
|
|
|
|
|
if scene.width < 20 or scene.height < 20: |
|
|
return False |
|
|
|
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Scene quality check failed: {e}") |
|
|
return True |
|
|
@log_execution |
|
|
def analyze_comic_layout_with_enhanced_gemini(self, comic_image, num_scenes): |
|
|
""" |
|
|
Enhanced Gemini Vision analysis with better prompting and fallback logic. |
|
|
Specialized for detecting 12-panel layouts and complex grid structures. |
|
|
|
|
|
Args: |
|
|
comic_image: PIL.Image.Image object of the comic |
|
|
num_scenes: Expected number of scenes (used for context and validation) |
|
|
|
|
|
Returns: |
|
|
tuple: (rows, cols) representing the detected grid layout |
|
|
""" |
|
|
try: |
|
|
model = GenerativeModel('gemini-2.5-flash') |
|
|
|
|
|
buffered = io.BytesIO() |
|
|
comic_image.save(buffered, format="PNG") |
|
|
img_bytes = buffered.getvalue() |
|
|
|
|
|
analysis_prompt = f""" |
|
|
You are a professional comic book layout analyst. Examine this comic image carefully to determine its precise panel grid structure. |
|
|
|
|
|
ANALYSIS TASK: |
|
|
- Count the exact number of ROWS (horizontal divisions) |
|
|
- Count the exact number of COLUMNS (vertical divisions) |
|
|
- Expected panels: {num_scenes} (use as context, but trust what you see) |
|
|
|
|
|
DETECTION GUIDELINES: |
|
|
1. Look for panel borders, gutters, or visual separations |
|
|
2. Identify consistent grid patterns |
|
|
3. Count horizontal lines that divide rows |
|
|
4. Count vertical lines that divide columns |
|
|
5. For 12 panels, common layouts are: 3Γ4, 4Γ3, 2Γ6, or 6Γ2 |
|
|
6. Trust visual evidence over expected numbers |
|
|
|
|
|
VISUAL INDICATORS TO LOOK FOR: |
|
|
- Black border lines between panels |
|
|
- White gutters or spacing between sections |
|
|
- Consistent rectangular divisions |
|
|
- Grid-like organization of content |
|
|
- Clear separation of distinct visual areas |
|
|
|
|
|
IMPORTANT: Be precise about what you actually observe. If you see a clear grid pattern, report it exactly. |
|
|
|
|
|
Respond with ONLY this JSON format: |
|
|
{{ |
|
|
"detected_rows": [number of rows you count], |
|
|
"detected_cols": [number of columns you count], |
|
|
"total_panels_detected": [rows Γ cols], |
|
|
"confidence": "high/medium/low", |
|
|
"layout_description": "detailed description of the grid structure you observe", |
|
|
"visual_evidence": "description of the visual cues that led to this conclusion" |
|
|
}} |
|
|
|
|
|
Be extremely precise in your counting. |
|
|
""" |
|
|
|
|
|
max_retries = 2 |
|
|
for attempt in range(max_retries): |
|
|
try: |
|
|
response = model.generate_content([analysis_prompt, comic_image]) |
|
|
response_text = response.text.strip() |
|
|
|
|
|
print(f"Gemini Vision analysis (attempt {attempt + 1}): {response_text[:200]}...") |
|
|
|
|
|
json_match = re.search(r'\{.*\}', response_text, re.DOTALL) |
|
|
if json_match: |
|
|
json_str = json_match.group() |
|
|
analysis_result = json.loads(json_str) |
|
|
|
|
|
rows = analysis_result.get("detected_rows", 0) |
|
|
cols = analysis_result.get("detected_cols", 0) |
|
|
total_detected = analysis_result.get("total_panels_detected", 0) |
|
|
confidence = analysis_result.get("confidence", "unknown") |
|
|
description = analysis_result.get("layout_description", "") |
|
|
evidence = analysis_result.get("visual_evidence", "") |
|
|
|
|
|
if rows > 0 and cols > 0: |
|
|
if total_detected == rows * cols: |
|
|
print(f"β
Gemini detected {rows}Γ{cols} layout ({total_detected} panels) with {confidence} confidence") |
|
|
print(f"Evidence: {evidence}") |
|
|
|
|
|
if num_scenes == 12: |
|
|
if total_detected in [10, 11, 12, 13, 14, 15, 16, 17, 18]: |
|
|
print(f"π Layout reasonable for 12-panel comic") |
|
|
return (rows, cols) |
|
|
else: |
|
|
print(f"β οΈ Detected {total_detected} panels for 12-panel comic. Using optimized layout.") |
|
|
return self._calculate_optimal_grid_layout(num_scenes) |
|
|
else: |
|
|
return (rows, cols) |
|
|
else: |
|
|
print(f"β Math inconsistency: {rows}Γ{cols} β {total_detected}") |
|
|
else: |
|
|
print(f"β Invalid dimensions: {rows}Γ{cols}") |
|
|
|
|
|
except json.JSONDecodeError as e: |
|
|
print(f"β JSON parsing error on attempt {attempt + 1}: {e}") |
|
|
if attempt == max_retries - 1: |
|
|
break |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Analysis error on attempt {attempt + 1}: {e}") |
|
|
if attempt == max_retries - 1: |
|
|
break |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Gemini Vision analysis completely failed: {e}") |
|
|
|
|
|
print("β οΈ Using optimized grid calculation as fallback") |
|
|
return self._calculate_optimal_grid_layout(num_scenes) |
|
|
@log_execution |
|
|
def _find_all_factorizations(self, n): |
|
|
""" |
|
|
Find all possible factorizations of a number into rows Γ columns. |
|
|
Enhanced with better algorithm for large numbers like 24. |
|
|
|
|
|
Args: |
|
|
n: Number to factorize |
|
|
|
|
|
Returns: |
|
|
list: List of tuples (rows, cols) where rows * cols = n, sorted by preference |
|
|
""" |
|
|
factorizations = [] |
|
|
for i in range(1, int(n**0.5) + 1): |
|
|
if n % i == 0: |
|
|
rows, cols = i, n // i |
|
|
factorizations.append((rows, cols)) |
|
|
if rows != cols: |
|
|
factorizations.append((cols, rows)) |
|
|
|
|
|
factorizations.sort(key=lambda x: (abs(x[0] - x[1]), max(x[0], x[1]))) |
|
|
return factorizations |
|
|
@log_execution |
|
|
def _calculate_optimal_layout(self, num_scenes, image_width, image_height): |
|
|
""" |
|
|
Calculate the optimal grid layout based on image aspect ratio and scene count. |
|
|
Enhanced algorithm with better preferences for different panel counts. |
|
|
|
|
|
Args: |
|
|
num_scenes: Number of scenes to arrange |
|
|
image_width: Width of the comic image |
|
|
image_height: Height of the comic image |
|
|
|
|
|
Returns: |
|
|
tuple: (rows, cols) representing the optimal grid layout |
|
|
""" |
|
|
image_aspect_ratio = image_width / image_height |
|
|
|
|
|
factorizations = self._find_all_factorizations(num_scenes) |
|
|
|
|
|
if not factorizations: |
|
|
import math |
|
|
sqrt_scenes = math.sqrt(num_scenes) |
|
|
rows = int(sqrt_scenes) |
|
|
cols = math.ceil(num_scenes / rows) |
|
|
return (rows, cols) |
|
|
|
|
|
best_layout = factorizations[0] |
|
|
best_score = float('inf') |
|
|
|
|
|
for rows, cols in factorizations: |
|
|
layout_aspect_ratio = cols / rows |
|
|
|
|
|
aspect_diff = abs(layout_aspect_ratio - image_aspect_ratio) |
|
|
|
|
|
panel_aspect = (image_width / cols) / (image_height / rows) |
|
|
extremeness_penalty = 0 |
|
|
if panel_aspect < 0.3 or panel_aspect > 3.0: |
|
|
extremeness_penalty = 2.0 |
|
|
|
|
|
total_score = aspect_diff + extremeness_penalty |
|
|
|
|
|
if total_score < best_score: |
|
|
best_score = total_score |
|
|
best_layout = (rows, cols) |
|
|
|
|
|
return best_layout |
|
|
@log_execution |
|
|
def get_possible_layouts(self, num_scenes): |
|
|
""" |
|
|
Get all possible layout options for a given number of scenes. |
|
|
Enhanced with better layout suggestions. |
|
|
|
|
|
Args: |
|
|
num_scenes: Number of scenes |
|
|
|
|
|
Returns: |
|
|
list: List of tuples (rows, cols) representing possible layouts, sorted by preference |
|
|
""" |
|
|
if num_scenes in [1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 21, 24]: |
|
|
optimal = self._calculate_optimal_grid_layout(num_scenes) |
|
|
alternatives = self._find_all_factorizations(num_scenes) |
|
|
|
|
|
layouts = [optimal] |
|
|
layouts.extend([layout for layout in alternatives if layout != optimal]) |
|
|
return layouts |
|
|
else: |
|
|
return self._find_all_factorizations(num_scenes) |
|
|
@log_execution |
|
|
def generate_comic_with_quality_metrics(self, story_data, output_path=None, style=None): |
|
|
""" |
|
|
Enhanced comic generation with quality metrics and validation. |
|
|
Provides detailed feedback about the generation process. |
|
|
|
|
|
Args: |
|
|
story_data: Dictionary containing the story information |
|
|
output_path: Optional path to save the resulting image |
|
|
style: Optional comic style to use |
|
|
|
|
|
Returns: |
|
|
tuple: (comic_image, data_url, quality_metrics) |
|
|
""" |
|
|
start_time = time.time() |
|
|
|
|
|
title = story_data.get("title", "Enhanced Comic") |
|
|
description = story_data.get("description", "") |
|
|
characters = story_data.get("characters", []) |
|
|
settings = story_data.get("settings", []) |
|
|
num_scenes = 9 |
|
|
|
|
|
quality_metrics = { |
|
|
"character_count": len([c for c in characters if isinstance(c, dict) and "visual_description" in c]), |
|
|
"setting_count": len([s for s in settings if isinstance(s, dict) and "description" in s]), |
|
|
"description_length": len(description), |
|
|
"optimal_layout": self._calculate_optimal_grid_layout(num_scenes), |
|
|
"generation_complexity": "high" if num_scenes >= 20 else "medium" if num_scenes >= 10 else "low" |
|
|
} |
|
|
|
|
|
try: |
|
|
prompt = self._create_comic_prompt(title, description, characters, settings, style, num_scenes) |
|
|
|
|
|
print(f"π¨ Generating {num_scenes}-panel comic with enhanced prompt ({len(prompt)} characters)") |
|
|
|
|
|
comic_image = generate_image_fn( |
|
|
selected_prompt=prompt, |
|
|
output_path=output_path |
|
|
) |
|
|
|
|
|
if comic_image is None: |
|
|
comic_image = self._create_enhanced_placeholder_comic(title, description, num_scenes) |
|
|
quality_metrics["generation_status"] = "placeholder" |
|
|
else: |
|
|
quality_metrics["generation_status"] = "success" |
|
|
|
|
|
if output_path: |
|
|
directory = os.path.dirname(output_path) |
|
|
if directory and not os.path.exists(directory): |
|
|
os.makedirs(directory) |
|
|
comic_image.save(output_path) |
|
|
|
|
|
buffered = io.BytesIO() |
|
|
comic_image.save(buffered, format="PNG") |
|
|
img_bytes = buffered.getvalue() |
|
|
img_b64 = base64.b64encode(img_bytes).decode("utf-8") |
|
|
data_url = f"data:image/png;base64,{img_b64}" |
|
|
|
|
|
end_time = time.time() |
|
|
quality_metrics["generation_time"] = end_time - start_time |
|
|
quality_metrics["image_size"] = (comic_image.width, comic_image.height) |
|
|
quality_metrics["prompt_complexity"] = len(prompt.split()) |
|
|
|
|
|
return comic_image, data_url, quality_metrics |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in enhanced generation: {str(e)}") |
|
|
placeholder = self._create_enhanced_placeholder_comic(title, description, num_scenes) |
|
|
|
|
|
buffered = io.BytesIO() |
|
|
placeholder.save(buffered, format="PNG") |
|
|
img_bytes = buffered.getvalue() |
|
|
img_b64 = base64.b64encode(img_bytes).decode("utf-8") |
|
|
data_url = f"data:image/png;base64,{img_b64}" |
|
|
|
|
|
quality_metrics["generation_status"] = "error" |
|
|
quality_metrics["error_message"] = str(e) |
|
|
|
|
|
return placeholder, data_url, quality_metrics |
|
|
@log_execution |
|
|
def _create_enhanced_placeholder_comic(self, title, description, num_scenes): |
|
|
""" |
|
|
Create an enhanced placeholder comic that shows the intended layout. |
|
|
|
|
|
Args: |
|
|
title: Title of the comic |
|
|
description: Description of the comic |
|
|
num_scenes: Number of scenes the comic should have |
|
|
|
|
|
Returns: |
|
|
PIL.Image.Image: Enhanced placeholder comic image |
|
|
""" |
|
|
if num_scenes <= 4: |
|
|
width, height = 800, 600 |
|
|
elif num_scenes <= 12: |
|
|
width, height = 1200, 900 |
|
|
else: |
|
|
width, height = 1600, 1200 |
|
|
|
|
|
comic = Image.new("RGB", (width, height), (248, 248, 248)) |
|
|
draw = ImageDraw.Draw(comic) |
|
|
|
|
|
try: |
|
|
title_font = ImageFont.truetype("Arial.ttf", max(24, width // 40)) |
|
|
panel_font = ImageFont.truetype("Arial.ttf", max(12, width // 80)) |
|
|
desc_font = ImageFont.truetype("Arial.ttf", max(10, width // 100)) |
|
|
except IOError: |
|
|
title_font = panel_font = desc_font = ImageFont.load_default() |
|
|
|
|
|
title_text = f"{title} - {num_scenes} Panel Layout Preview" |
|
|
draw.text((20, 20), title_text, fill=(50, 50, 50), font=title_font) |
|
|
|
|
|
layout = self._calculate_optimal_grid_layout(num_scenes) |
|
|
rows, cols = layout |
|
|
|
|
|
layout_info = f"Layout: {rows}Γ{cols} grid ({rows * cols} panels)" |
|
|
draw.text((20, 60), layout_info, fill=(100, 100, 100), font=panel_font) |
|
|
|
|
|
panel_area_y = 100 |
|
|
panel_area_height = height - panel_area_y - 60 |
|
|
panel_width = (width - 60) // cols |
|
|
panel_height = panel_area_height // rows |
|
|
|
|
|
panel_count = 0 |
|
|
for row in range(rows): |
|
|
for col in range(cols): |
|
|
if panel_count >= num_scenes: |
|
|
break |
|
|
|
|
|
x = 30 + col * panel_width |
|
|
y = panel_area_y + row * panel_height |
|
|
|
|
|
draw.rectangle([x, y, x + panel_width - 10, y + panel_height - 10], |
|
|
outline=(150, 150, 150), fill=(255, 255, 255)) |
|
|
|
|
|
panel_text = f"Panel {panel_count + 1}" |
|
|
draw.text((x + 10, y + 10), panel_text, fill=(100, 100, 100), font=panel_font) |
|
|
|
|
|
panel_count += 1 |
|
|
|
|
|
if panel_count >= num_scenes: |
|
|
break |
|
|
|
|
|
if description and len(description) > 0: |
|
|
desc_y = height - 50 |
|
|
wrapped_desc = textwrap.fill(description[:200] + "..." if len(description) > 200 else description, width=80) |
|
|
draw.text((30, desc_y), wrapped_desc, fill=(80, 80, 80), font=desc_font) |
|
|
|
|
|
return comic |
|
|
|
|
|
|
|
|
@log_execution |
|
|
|
|
|
def generate_panel_descriptions(self, final_prompt, num_scenes=9): |
|
|
""" |
|
|
Generate panel-by-panel descriptions and format into complete comic generation prompt. |
|
|
|
|
|
Args: |
|
|
final_prompt: The complete story/prompt text |
|
|
num_scenes: Number of panels (default: 9) |
|
|
|
|
|
Returns: |
|
|
str: Complete formatted prompt ready for image generation |
|
|
""" |
|
|
try: |
|
|
model = GenerativeModel('gemini-2.0-flash-exp') |
|
|
|
|
|
|
|
|
analysis_prompt = f"""You are a master comic book storyteller. Break down this story into {num_scenes} COMPLETELY DIFFERENT panels. |
|
|
|
|
|
STORY: |
|
|
{final_prompt} |
|
|
|
|
|
ABSOLUTE REQUIREMENTS FOR UNIQUENESS: |
|
|
|
|
|
1. STORY STRUCTURE - Divide the story into {num_scenes} distinct narrative beats: |
|
|
- Each panel = ONE specific story moment that happens at a DIFFERENT time |
|
|
- Panel 1 happens BEFORE Panel 2, Panel 2 BEFORE Panel 3, etc. |
|
|
- NO panel should show the same moment or similar action |
|
|
- Think of it like a movie: each panel is a different scene |
|
|
|
|
|
2. VISUAL VARIETY - Each panel MUST have: |
|
|
- DIFFERENT location or setting (if story allows) |
|
|
- DIFFERENT character positions and poses |
|
|
- DIFFERENT camera angle/shot type |
|
|
- DIFFERENT action or emotional beat |
|
|
- DIFFERENT time of day or lighting (if applicable) |
|
|
|
|
|
3. SHOT TYPES - Use variety: |
|
|
- Extreme Wide Shot, Wide Shot, Medium Shot, Close-Up, Extreme Close-Up, Over-the-Shoulder, Low Angle, High Angle, Bird's Eye View |
|
|
|
|
|
FORMAT EXACTLY LIKE THIS: |
|
|
Panel 1: [Title] |
|
|
Shot Type: [Type] |
|
|
Content: [Detailed description] |
|
|
|
|
|
Panel 2: [Different title] |
|
|
Shot Type: [Different type] |
|
|
Content: [Completely different scene] |
|
|
|
|
|
Generate all {num_scenes} panels now:""" |
|
|
|
|
|
generation_config = GenerationConfig( |
|
|
temperature=0.9, |
|
|
top_p=0.95, |
|
|
) |
|
|
|
|
|
response = model.generate_content(analysis_prompt, generation_config=generation_config) |
|
|
panel_descriptions = response.text.strip() |
|
|
|
|
|
|
|
|
grid_layout = "3x3 grid (3 rows, 3 columns)" if num_scenes == 9 else f"{num_scenes} panels" |
|
|
|
|
|
complete_prompt = f'''"""CRITICAL COMMAND: UNIFORM {grid_layout.upper()} (NON-NEGOTIABLE) |
|
|
|
|
|
Layout: Generate exactly {num_scenes} panels in a {grid_layout}. |
|
|
Panel Integrity: Every panel MUST be identical in size and shape. Do not change panel dimensions for any reason. |
|
|
Formatting: Use clean, equal-width white gutters between all panels and a uniform thin black border around each panel. |
|
|
CRITICAL RULE: SILENT COMIC - NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS EVER. |
|
|
CRITICAL RULE: EACH PANEL IS A DISTINCT SCENE. |
|
|
Each panel must depict a unique, self-contained moment or tableau from the story. |
|
|
PANEL-BY-PANEL STORYBOARD (READ LEFT-TO-RIGHT, TOP-TO-BOTTOM) |
|
|
{panel_descriptions} |
|
|
|
|
|
GLOBAL STYLE & CONSISTENCY MANDATES |
|
|
Art Style: Modern Digital Manga |
|
|
|
|
|
Line Art: Sharp digital lines, clean and precise, emphasizing dynamic movement and emotional clarity. |
|
|
Tones & Shading: Cel shading with clear, distinct shadows and highlights, giving a vibrant yet defined look. |
|
|
Composition: Every panel must have a clear focal point and excellent use of foreground, midground, and background elements. |
|
|
Character Consistency: Characters must maintain consistent facial features, hair, and design throughout all panels while showing progression in age, clothing, or emotional state as the story requires. |
|
|
Environmental & Lighting Continuity: Lighting and atmosphere should support the narrative progression and emotional tone of each scene. |
|
|
Color Palette: A vibrant and saturated palette that enhances the story's emotional journey. |
|
|
|
|
|
FINAL OVERRIDE & NEGATIVE PROMPTS |
|
|
ABSOLUTE RULE: The {grid_layout} uniform grid structure is the most important rule and must be followed perfectly. |
|
|
NEGATIVE PROMPTS: NO deformed anatomy, NO missing limbs or facial features, NO inconsistent character designs, NO blurry or out-of-focus elements, NO sketch-like aesthetics (unless intentional style choice), NO TEXT, NO SPEECH BUBBLES, NO SOUND EFFECTS, NO CAPTIONS, NO watermarks, NO VARIATION IN PANEL SIZE OR SHAPE. |
|
|
"""''' |
|
|
|
|
|
print(f"Generated complete prompt with {num_scenes} panels") |
|
|
|
|
|
return complete_prompt |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error generating complete prompt: {e}") |
|
|
return None |