Spaces:
Running
on
Zero
Running
on
Zero
| #!/usr/bin/env python3 | |
| """ | |
| Application Gradio pour la traduction audio en temps réel. | |
| Version simplifiée compatible avec Hugging Face Spaces (zero GPU). | |
| """ | |
| import os | |
| import tempfile | |
| import time | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| import librosa | |
| import soundfile as sf | |
| from typing import Optional, Tuple | |
| # Import spaces pour Zero GPU | |
| try: | |
| import spaces | |
| SPACES_ZERO_GPU = True | |
| print("✅ Spaces Zero GPU disponible") | |
| except ImportError: | |
| SPACES_ZERO_GPU = False | |
| print("⚠️ Spaces Zero GPU non disponible (mode local)") | |
| # Créer un décorateur factice pour le mode local | |
| class spaces: | |
| def GPU(func): | |
| return func | |
| # Détection de l'environnement | |
| IS_HUGGINGFACE_SPACES = os.getenv("SPACE_ID") is not None | |
| IS_LOCAL = not IS_HUGGINGFACE_SPACES | |
| # Configuration selon l'environnement | |
| if IS_HUGGINGFACE_SPACES: | |
| print("🌐 Environnement détecté: Hugging Face Spaces") | |
| else: | |
| print("🏠 Environnement détecté: Local") | |
| # Import pour STT Whisper | |
| from transformers import pipeline | |
| # Classes simplifiées intégrées | |
| class WhisperSTTEngine: | |
| """Moteur STT avec Whisper (simple et efficace).""" | |
| def __init__(self, device="cpu"): | |
| self.device = device | |
| self.model = None | |
| print("🎙️ Initialisation du moteur STT Whisper...") | |
| def load_model(self): | |
| """Charge le modèle Whisper.""" | |
| if self.model is not None: | |
| return True | |
| try: | |
| print("📥 Chargement du modèle Whisper...") | |
| # Utiliser un modèle Whisper léger avec support FR/EN | |
| self.model = pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-small", # Small pour meilleur support multilingue | |
| device=0 if self.device == "cuda" else -1 | |
| ) | |
| print("✅ Modèle Whisper chargé avec succès!") | |
| return True | |
| except Exception as e: | |
| print(f"❌ Erreur chargement Whisper: {e}") | |
| return False | |
| def transcribe(self, audio_path: str, language: str = "fr") -> str: | |
| """Transcrit un fichier audio dans la langue spécifiée.""" | |
| if self.model is None: | |
| if not self.load_model(): | |
| return "" | |
| try: | |
| print(f"🎙️ Transcription de: {audio_path} (langue: {language})") | |
| # Charger l'audio avec librosa (évite le besoin de ffmpeg) | |
| import librosa | |
| audio_array, sample_rate = librosa.load(audio_path, sr=16000) # Whisper utilise 16kHz | |
| # Limiter à 30 secondes maximum (3000 mel features = ~30s) | |
| max_duration = 30 # secondes | |
| max_samples = max_duration * sample_rate | |
| if len(audio_array) > max_samples: | |
| audio_array = audio_array[:max_samples] | |
| print(f"✂️ Audio tronqué à {max_duration} secondes") | |
| # Convertir le code langue (fr/en) en langue complète pour Whisper | |
| lang_map = { | |
| "fr": "french", | |
| "en": "english" | |
| } | |
| whisper_lang = lang_map.get(language, "french") | |
| # Passer l'array audio directement à Whisper avec la langue forcée | |
| result = self.model( | |
| audio_array, | |
| generate_kwargs={ | |
| "language": whisper_lang, | |
| "task": "transcribe" # Transcription (pas traduction) | |
| } | |
| ) | |
| transcription = result["text"].strip() | |
| print(f"✅ Transcription ({language}): {transcription}") | |
| return transcription | |
| except Exception as e: | |
| print(f"❌ Erreur transcription: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return "" | |
| class SimpleTTSEngine: | |
| """Moteur TTS avec gTTS (Simple et fiable).""" | |
| def __init__(self): | |
| print("🔊 Moteur TTS gTTS initialisé") | |
| def generate_speech(self, text: str, voice: str = None) -> str: | |
| """Génère la synthèse vocale avec gTTS.""" | |
| if not text.strip(): | |
| return None | |
| try: | |
| from gtts import gTTS | |
| # Déterminer la langue selon le texte | |
| if voice and "fr" in voice.lower(): | |
| lang = "fr" | |
| elif voice and "en" in voice.lower(): | |
| lang = "en" | |
| else: | |
| # Détecter la langue du texte | |
| if any(char in text for char in "àâäéèêëïîôöùûüÿç"): | |
| lang = "fr" | |
| else: | |
| lang = "en" | |
| print(f"🎤 Génération audio avec gTTS (langue: {lang})") | |
| # Créer un fichier temporaire | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| temp_path = temp_file.name | |
| temp_file.close() | |
| # Générer l'audio avec gTTS | |
| tts = gTTS(text=text, lang=lang, slow=False) | |
| tts.save(temp_path) | |
| print(f"✅ Audio généré: {temp_path}") | |
| return temp_path | |
| except Exception as e: | |
| print(f"❌ Erreur TTS: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| def get_voice_for_language(self, language: str) -> str: | |
| """Retourne la langue appropriée pour gTTS.""" | |
| voices = { | |
| "fr": "fr", # Français | |
| "en": "en" # Anglais | |
| } | |
| return voices.get(language, "fr") | |
| class SimpleTranslationEngine: | |
| """Moteur de traduction avec modèle Helsinki-NLP.""" | |
| def __init__(self, device="cpu"): | |
| self.device = device | |
| self.models_ready = False | |
| self.translator_fr_en = None | |
| self.translator_en_fr = None | |
| print("🌍 Moteur de traduction initialisé") | |
| def load_models(self): | |
| """Charge les modèles de traduction.""" | |
| if self.models_ready: | |
| return True | |
| try: | |
| print("📥 Chargement des modèles de traduction...") | |
| # Modèle FR -> EN | |
| self.translator_fr_en = pipeline( | |
| "translation", | |
| model="Helsinki-NLP/opus-mt-fr-en", | |
| device=0 if self.device == "cuda" else -1 | |
| ) | |
| # Modèle EN -> FR | |
| self.translator_en_fr = pipeline( | |
| "translation", | |
| model="Helsinki-NLP/opus-mt-en-fr", | |
| device=0 if self.device == "cuda" else -1 | |
| ) | |
| self.models_ready = True | |
| print("✅ Modèles de traduction chargés avec succès!") | |
| return True | |
| except Exception as e: | |
| print(f"❌ Erreur chargement traduction: {e}") | |
| return False | |
| def translate(self, text: str, source_lang: str = "fr", target_lang: str = "en") -> str: | |
| """Traduit le texte avec Helsinki-NLP.""" | |
| if not text.strip(): | |
| return "" | |
| try: | |
| # Charger les modèles si nécessaire | |
| if not self.load_models(): | |
| return f"[Erreur: Modèles non chargés] {text}" | |
| # Déterminer le traducteur à utiliser | |
| if source_lang == "fr" and target_lang == "en": | |
| translator = self.translator_fr_en | |
| elif source_lang == "en" and target_lang == "fr": | |
| translator = self.translator_en_fr | |
| else: | |
| return f"[Direction {source_lang} → {target_lang} non supportée] {text}" | |
| # Traduction | |
| result = translator(text) | |
| translation = result[0]["translation_text"].strip() | |
| # Nettoyer la traduction | |
| if '\n' in translation: | |
| translation = translation.split('\n')[0] | |
| # Nettoyer les préfixes de traduction | |
| prefixes_to_remove = [ | |
| "Translation:", "Traduction:", "English:", "Français:", | |
| "French:", "Anglais:", "Here's the translation:", | |
| "Voici la traduction:", "The translation is:", | |
| "La traduction est:" | |
| ] | |
| for prefix in prefixes_to_remove: | |
| if translation.lower().startswith(prefix.lower()): | |
| translation = translation[len(prefix):].strip() | |
| break | |
| # Nettoyer les guillemets en début/fin | |
| if translation.startswith('"') and translation.endswith('"'): | |
| translation = translation[1:-1] | |
| elif translation.startswith("'") and translation.endswith("'"): | |
| translation = translation[1:-1] | |
| return translation if translation else text | |
| except Exception as e: | |
| print(f"❌ Erreur traduction: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return f"[Erreur: {str(e)[:50]}]" | |
| class AudioTranslationApp: | |
| """Application Gradio pour la traduction audio.""" | |
| def __init__(self): | |
| """Initialise l'application.""" | |
| # Configuration du device selon l'environnement | |
| if IS_HUGGINGFACE_SPACES and SPACES_ZERO_GPU: | |
| # Utiliser GPU via Zero GPU de Hugging Face Spaces | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print("🚀 Zero GPU activé - GPU disponible à la demande") | |
| elif IS_HUGGINGFACE_SPACES: | |
| self.device = "cpu" # Force CPU pour Hugging Face Spaces sans Zero GPU | |
| else: | |
| # En local, utiliser GPU si disponible | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Moteurs simplifiés | |
| self.stt_engine = WhisperSTTEngine(device=self.device) | |
| self.tts_engine = SimpleTTSEngine() | |
| self.translation_engine = SimpleTranslationEngine(device=self.device) | |
| # État de l'application | |
| self.is_processing = False | |
| self.current_audio_path = None | |
| print("🚀 Application de traduction audio initialisée") | |
| print(f"🔧 Device utilisé: {self.device}") | |
| if self.device == "cuda": | |
| print(f"🚀 GPU détecté: {torch.cuda.get_device_name(0)}") | |
| print(f"💾 Mémoire GPU: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB") | |
| def load_stt_model(self): | |
| """Charge le modèle STT si pas déjà chargé.""" | |
| if self.stt_engine is not None: | |
| return True | |
| try: | |
| print("📥 Chargement du modèle STT...") | |
| from moshi.models import loaders | |
| ckpt = loaders.CheckpointInfo.from_hf_repo("kyutai/stt-1b-en_fr") | |
| mimi = ckpt.get_mimi(device=self.device) | |
| text_tok = ckpt.get_text_tokenizer() | |
| lm = ckpt.get_moshi(device=self.device) | |
| self.stt_engine = InferenceState(mimi, text_tok, lm, batch_size=1, device=self.device) | |
| print("✅ Modèle STT chargé avec succès!") | |
| return True | |
| except Exception as e: | |
| print(f"❌ Erreur chargement STT: {e}") | |
| return False | |
| def process_audio(self, audio_path: str, source_lang: str = "fr", target_lang: str = "en") -> Tuple[str, str, str]: | |
| """ | |
| Traite un fichier audio : transcription -> traduction -> synthèse vocale. | |
| """ | |
| if self.is_processing: | |
| return "⚠️ Traitement en cours...", "", None | |
| self.is_processing = True | |
| try: | |
| print(f"🎵 Traitement de l'audio: {audio_path}") | |
| # 1. Transcription avec Whisper (avec langue source) | |
| print(f"🎙️ Transcription en cours (langue: {source_lang})...") | |
| transcription = self.stt_engine.transcribe(audio_path, language=source_lang) | |
| if not transcription.strip(): | |
| return "⚠️ Aucune transcription détectée", "", None | |
| print(f"📝 Texte transcrit: {transcription}") | |
| # 2. Traduction | |
| print("🌍 Traduction en cours...") | |
| translation = self.translation_engine.translate(transcription, source_lang, target_lang) | |
| if not translation.strip(): | |
| return transcription, "⚠️ Erreur de traduction", None | |
| print(f"🌍 Texte traduit: {translation}") | |
| # 3. Synthèse vocale (TTS) | |
| print("🔊 Génération audio en cours...") | |
| output_audio_path = self._generate_speech(translation, target_lang) | |
| if output_audio_path: | |
| print(f"✅ Traitement terminé!") | |
| return transcription, translation, output_audio_path | |
| except Exception as e: | |
| print(f"❌ Erreur traitement: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return f"❌ Erreur: {str(e)}", "", None | |
| finally: | |
| self.is_processing = False | |
| def _generate_speech(self, text: str, target_lang: str) -> str: | |
| """Génère la synthèse vocale.""" | |
| try: | |
| # Déterminer la voix selon la langue cible | |
| voice = self.tts_engine.get_voice_for_language(target_lang) | |
| print(f"🔊 Génération TTS: '{text}' -> {target_lang} (voix: {voice})") | |
| # Générer l'audio | |
| output_path = self.tts_engine.generate_speech(text, voice) | |
| return output_path | |
| except Exception as e: | |
| print(f"❌ Erreur TTS: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| # Instance globale de l'application | |
| app = AudioTranslationApp() | |
| def process_audio_file(audio, source_lang, target_lang): | |
| """ | |
| Fonction Gradio pour traiter un fichier audio. | |
| Utilise Zero GPU de Hugging Face Spaces. | |
| Args: | |
| audio: Tuple (sample_rate, audio_data) de Gradio | |
| source_lang: Langue source | |
| target_lang: Langue cible | |
| Returns: | |
| Tuple (transcription, traduction, audio_généré) | |
| """ | |
| if audio is None: | |
| return "⚠️ Aucun audio fourni", "", None | |
| try: | |
| # Extraire les données audio | |
| sample_rate, audio_data = audio | |
| # Sauvegarder temporairement | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
| temp_path = temp_file.name | |
| sf.write(temp_path, audio_data, sample_rate) | |
| # Traiter l'audio | |
| transcription, translation, output_audio = app.process_audio( | |
| temp_path, source_lang, target_lang | |
| ) | |
| # Nettoyer le fichier temporaire | |
| try: | |
| os.unlink(temp_path) | |
| except: | |
| pass | |
| return transcription, translation, output_audio | |
| except Exception as e: | |
| return f"❌ Erreur: {str(e)}", "", None | |
| # Interface Gradio | |
| def create_interface(): | |
| """Crée l'interface Gradio.""" | |
| with gr.Blocks( | |
| title="🎙️ Traduction Speech to Speech", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1000px !important; | |
| margin: 0 auto; | |
| } | |
| .gradio-container > div { | |
| display: flex !important; | |
| flex-direction: column !important; | |
| align-items: center !important; | |
| } | |
| .main-header { | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 2rem; | |
| border-radius: 15px; | |
| box-shadow: 0 10px 30px rgba(0,0,0,0.1); | |
| } | |
| .main-header h1 { | |
| margin: 0; | |
| font-size: 2.5rem; | |
| font-weight: 700; | |
| } | |
| .main-header p { | |
| margin: 0.5rem 0 0 0; | |
| font-size: 1.1rem; | |
| opacity: 0.9; | |
| } | |
| .status-box { | |
| background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); | |
| border: 2px solid #dee2e6; | |
| border-radius: 12px; | |
| padding: 1.5rem; | |
| margin: 1rem 0; | |
| box-shadow: 0 4px 15px rgba(0,0,0,0.05); | |
| } | |
| .status-box h4 { | |
| margin: 0 0 0.5rem 0; | |
| color: #495057; | |
| font-weight: 600; | |
| } | |
| .audio-input { | |
| border: 2px dashed #6c757d; | |
| border-radius: 12px; | |
| padding: 1rem; | |
| background: #f8f9fa; | |
| } | |
| .process-button { | |
| background: linear-gradient(135deg, #28a745 0%, #20c997 100%) !important; | |
| border: none !important; | |
| border-radius: 12px !important; | |
| padding: 1rem 2rem !important; | |
| font-size: 1.2rem !important; | |
| font-weight: 600 !important; | |
| box-shadow: 0 6px 20px rgba(40, 167, 69, 0.3) !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .process-button:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 25px rgba(40, 167, 69, 0.4) !important; | |
| } | |
| .result-textbox { | |
| background: linear-gradient(135deg, #ffffff 0%, #f8f9fa 100%) !important; | |
| border: 3px solid #e9ecef !important; | |
| border-radius: 15px !important; | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important; | |
| font-size: 1.1rem !important; | |
| line-height: 1.8 !important; | |
| padding: 1.5rem !important; | |
| box-shadow: 0 8px 25px rgba(0,0,0,0.1) !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .result-textbox:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 12px 35px rgba(0,0,0,0.15) !important; | |
| } | |
| .result-audio { | |
| border: 3px solid #e9ecef !important; | |
| border-radius: 15px !important; | |
| background: linear-gradient(135deg, #ffffff 0%, #f8f9fa 100%) !important; | |
| padding: 1.5rem !important; | |
| box-shadow: 0 8px 25px rgba(0,0,0,0.1) !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .result-audio:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 12px 35px rgba(0,0,0,0.15) !important; | |
| } | |
| .gr-group { | |
| background: white; | |
| border: 1px solid #e9ecef; | |
| border-radius: 15px; | |
| padding: 1.5rem; | |
| margin: 1rem auto; | |
| box-shadow: 0 4px 15px rgba(0,0,0,0.05); | |
| width: 100%; | |
| max-width: 800px; | |
| text-align: center; | |
| } | |
| .gr-tabs { | |
| border-radius: 15px; | |
| overflow: hidden; | |
| box-shadow: 0 8px 25px rgba(0,0,0,0.1); | |
| background: white; | |
| } | |
| .gr-tab { | |
| background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); | |
| border: 2px solid #dee2e6; | |
| border-radius: 12px; | |
| margin: 0.5rem; | |
| padding: 1rem 1.5rem; | |
| font-weight: 600; | |
| font-size: 1.1rem; | |
| transition: all 0.3s ease; | |
| box-shadow: 0 4px 15px rgba(0,0,0,0.05); | |
| } | |
| .gr-tab:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 8px 25px rgba(0,0,0,0.15); | |
| } | |
| .gr-tab.selected { | |
| background: linear-gradient(135deg, #007bff 0%, #0056b3 100%); | |
| color: white; | |
| border-color: #007bff; | |
| box-shadow: 0 8px 25px rgba(0,123,255,0.3); | |
| } | |
| .gr-tabs .gr-tab:first-child { | |
| background: linear-gradient(135deg, #28a745 0%, #20c997 100%); | |
| color: white; | |
| font-weight: 700; | |
| border-color: #28a745; | |
| box-shadow: 0 8px 25px rgba(40,167,69,0.3); | |
| } | |
| .gr-row { | |
| justify-content: center !important; | |
| } | |
| .gr-column { | |
| display: flex !important; | |
| flex-direction: column !important; | |
| align-items: center !important; | |
| } | |
| .gr-button { | |
| margin: 0 auto !important; | |
| } | |
| .gr-dropdown { | |
| margin: 0 auto !important; | |
| } | |
| .gr-audio { | |
| margin: 0 auto !important; | |
| } | |
| .gr-textbox { | |
| margin: 0 auto !important; | |
| } | |
| .centered-layout { | |
| display: flex !important; | |
| flex-direction: column !important; | |
| align-items: center !important; | |
| width: 100% !important; | |
| } | |
| .results-section { | |
| background: linear-gradient(135deg, #ffffff 0%, #f8f9fa 100%) !important; | |
| border: 3px solid #e9ecef !important; | |
| border-radius: 20px !important; | |
| padding: 2rem !important; | |
| box-shadow: 0 15px 40px rgba(0,0,0,0.1) !important; | |
| margin: 2rem auto !important; | |
| max-width: 900px !important; | |
| } | |
| .results-section h3 { | |
| color: #495057 !important; | |
| font-weight: 700 !important; | |
| font-size: 1.5rem !important; | |
| margin-bottom: 1.5rem !important; | |
| text-align: center !important; | |
| } | |
| """ | |
| ) as interface: | |
| # En-tête | |
| gr.HTML(""" | |
| <div class="main-header"> | |
| <h1>🎙️ Speech-to-Speech Translator</h1> | |
| <p>Record audio, get transcription, translation and generated audio</p> | |
| </div> | |
| """) | |
| # Layout vertical amélioré et centré | |
| with gr.Column(elem_classes=["centered-layout"]): | |
| # Section 1: Audio Recording | |
| with gr.Group(): | |
| gr.Markdown("### 🎵 Audio Recording") | |
| gr.Markdown("⚠️ **Maximum 30 seconds** - Longer audio will be automatically truncated") | |
| audio_input = gr.Audio( | |
| label="Record your audio (max 30 seconds)", | |
| type="numpy", | |
| format="wav", | |
| elem_classes=["audio-input"] | |
| ) | |
| # Section 2: Language Configuration | |
| with gr.Group(): | |
| gr.Markdown("### ⚙️ Language Configuration") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| source_lang = gr.Dropdown( | |
| choices=["fr", "en"], | |
| value="fr", | |
| label="Source language", | |
| info="Language of the recorded audio" | |
| ) | |
| with gr.Column(scale=1): | |
| target_lang = gr.Dropdown( | |
| choices=["en", "fr"], | |
| value="en", | |
| label="Target language", | |
| info="Translation language" | |
| ) | |
| # Section 3: Process Button | |
| with gr.Group(): | |
| process_btn = gr.Button( | |
| "🚀 Process Audio", | |
| variant="primary", | |
| size="lg", | |
| elem_classes=["process-button"] | |
| ) | |
| # Section 4: Status | |
| status = gr.HTML(""" | |
| <div class="status-box"> | |
| <h4>📊 Status</h4> | |
| <p>✅ Ready to process audio</p> | |
| </div> | |
| """) | |
| # Section 5: Results | |
| with gr.Group(elem_classes=["results-section"]): | |
| gr.Markdown("### 📝 Results") | |
| with gr.Tabs(): | |
| with gr.Tab("🔊 Generated Audio"): | |
| audio_output = gr.Audio( | |
| label="Translation audio", | |
| type="filepath", | |
| elem_classes=["result-audio"] | |
| ) | |
| with gr.Tab("🎙️ Transcription"): | |
| transcription_output = gr.Textbox( | |
| label="Transcribed text", | |
| lines=4, | |
| interactive=False, | |
| elem_classes=["result-textbox"] | |
| ) | |
| with gr.Tab("🌍 Translation"): | |
| translation_output = gr.Textbox( | |
| label="Translated text", | |
| lines=4, | |
| interactive=False, | |
| elem_classes=["result-textbox"] | |
| ) | |
| # Events | |
| def update_status(message, color="green"): | |
| return gr.HTML(f""" | |
| <div class="status-box"> | |
| <h4>📊 Status</h4> | |
| <p style="color: {color};">{message}</p> | |
| </div> | |
| """) | |
| def process_audio_wrapper(audio, source_lang, target_lang): | |
| if audio is None: | |
| return ( | |
| "⚠️ No audio provided", | |
| "", | |
| None, | |
| update_status("⚠️ No audio provided", "orange") | |
| ) | |
| # Process audio | |
| transcription, translation, output_audio = process_audio_file( | |
| audio, source_lang, target_lang | |
| ) | |
| # Update status based on result | |
| if transcription and not transcription.startswith("❌"): | |
| final_status = update_status("✅ Processing completed successfully!", "green") | |
| else: | |
| final_status = update_status("❌ Error during processing", "red") | |
| return transcription, translation, output_audio, final_status | |
| # Connect events | |
| process_btn.click( | |
| fn=process_audio_wrapper, | |
| inputs=[audio_input, source_lang, target_lang], | |
| outputs=[transcription_output, translation_output, audio_output, status], | |
| show_progress=True | |
| ) | |
| return interface | |
| # Main entry point | |
| if __name__ == "__main__": | |
| # Create interface | |
| demo = create_interface() | |
| # Launch configuration based on environment | |
| if IS_HUGGINGFACE_SPACES: | |
| # Configuration for Hugging Face Spaces | |
| print("🌐 Hugging Face Spaces configuration") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True, | |
| quiet=False, | |
| debug=False | |
| ) | |
| else: | |
| # Configuration for local | |
| import socket | |
| def find_free_port(start_port=7860, max_port=7870): | |
| """Find a free port.""" | |
| for port in range(start_port, max_port): | |
| try: | |
| with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: | |
| s.bind(('127.0.0.1', port)) | |
| return port | |
| except OSError: | |
| continue | |
| return start_port # Fallback | |
| free_port = find_free_port() | |
| print(f"🌐 Local server started on http://127.0.0.1:{free_port}") | |
| demo.launch( | |
| server_name="127.0.0.1", # Localhost for local | |
| server_port=free_port, | |
| share=False, | |
| show_error=True, | |
| quiet=False, | |
| inbrowser=True # Automatically open browser | |
| ) | |