Update app.py
Browse files
app.py
CHANGED
|
@@ -191,15 +191,40 @@ def custom_cleanup(temp_dir, exclude_dir):
|
|
| 191 |
print(f"Failed to delete {file_path}. Reason: {e}")
|
| 192 |
|
| 193 |
|
| 194 |
-
def generate_audio(voice_cloning, voice_gender, text_prompt,user_voice_path):
|
| 195 |
print("generate_audio")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
if voice_cloning == 'no':
|
| 197 |
if voice_gender == 'male':
|
| 198 |
voice = 'echo'
|
| 199 |
print('Entering Audio creation using elevenlabs')
|
| 200 |
set_api_key('sk_e823e586aa0c238fdfae02466faad9472bb668fd04431fca')
|
| 201 |
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
| 204 |
for chunk in audio:
|
| 205 |
temp_file.write(chunk)
|
|
@@ -234,7 +259,16 @@ def generate_audio(voice_cloning, voice_gender, text_prompt,user_voice_path):
|
|
| 234 |
# voice = Voice(voice_id="DeZH4ash9IU9gUcNjVXh",name="Marc",settings=VoiceSettings(
|
| 235 |
# stability=0.71, similarity_boost=0.9, style=0.0, use_speaker_boost=True),)
|
| 236 |
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="cloned_audio_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
| 239 |
for chunk in audio:
|
| 240 |
temp_file.write(chunk)
|
|
@@ -275,8 +309,10 @@ def generate_video():
|
|
| 275 |
# image_path = '/home/user/app/images/marc_smile_enhanced.jpg' old code
|
| 276 |
# source_image = Image.open(image_path) old code
|
| 277 |
text_prompt = request.form['text_prompt']
|
| 278 |
-
|
| 279 |
print('Input text prompt: ',text_prompt)
|
|
|
|
|
|
|
|
|
|
| 280 |
text_prompt = text_prompt.strip()
|
| 281 |
if not text_prompt:
|
| 282 |
return jsonify({'error': 'Input text prompt cannot be blank'}), 400
|
|
@@ -313,7 +349,7 @@ def generate_video():
|
|
| 313 |
source_image = request.files['source_image']
|
| 314 |
source_image_path = save_uploaded_file(source_image, source_image.filename, TEMP_DIR)
|
| 315 |
print(f"Source image saved at: {source_image_path}")
|
| 316 |
-
driven_audio_path = generate_audio(voice_cloning, voice_gender, text_prompt,user_voice_path)
|
| 317 |
#driven_audio_path_not_use = user_voice_path
|
| 318 |
print(f"driven audio path: {driven_audio_path}")
|
| 319 |
save_dir = tempfile.mkdtemp(dir=TEMP_DIR.name)
|
|
|
|
| 191 |
print(f"Failed to delete {file_path}. Reason: {e}")
|
| 192 |
|
| 193 |
|
| 194 |
+
def generate_audio(voice_cloning, voice_gender, text_prompt,user_voice_path,language):
|
| 195 |
print("generate_audio")
|
| 196 |
+
# Map language → ElevenLabs voice/language codes
|
| 197 |
+
language_mapping = {
|
| 198 |
+
"en": "en-IN", # Indian English
|
| 199 |
+
"hi": "hi-IN", # Hindi
|
| 200 |
+
"ta": "ta-IN", # Tamil
|
| 201 |
+
"te": "te-IN", # Telugu
|
| 202 |
+
"ml": "ml-IN", # Malayalam
|
| 203 |
+
"bn": "bn-IN", # Bengali
|
| 204 |
+
"gu": "gu-IN", # Gujarati
|
| 205 |
+
"mr": "mr-IN", # Marathi
|
| 206 |
+
"kn": "kn-IN", # Kannada
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
selected_language = language_mapping.get(language, "en-IN")
|
| 210 |
+
print("TTS Language Selected:", selected_language)
|
| 211 |
+
|
| 212 |
if voice_cloning == 'no':
|
| 213 |
if voice_gender == 'male':
|
| 214 |
voice = 'echo'
|
| 215 |
print('Entering Audio creation using elevenlabs')
|
| 216 |
set_api_key('sk_e823e586aa0c238fdfae02466faad9472bb668fd04431fca')
|
| 217 |
|
| 218 |
+
#audio_old = generate(text = text_prompt, voice = "Daniel", model = "eleven_multilingual_v2",stream=True, latency=4)
|
| 219 |
+
audio = generate(
|
| 220 |
+
text=text_prompt,
|
| 221 |
+
voice="Daniel",
|
| 222 |
+
model="eleven_multilingual_v2",
|
| 223 |
+
stream=True,
|
| 224 |
+
latency=4,
|
| 225 |
+
language=selected_language
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
| 229 |
for chunk in audio:
|
| 230 |
temp_file.write(chunk)
|
|
|
|
| 259 |
# voice = Voice(voice_id="DeZH4ash9IU9gUcNjVXh",name="Marc",settings=VoiceSettings(
|
| 260 |
# stability=0.71, similarity_boost=0.9, style=0.0, use_speaker_boost=True),)
|
| 261 |
|
| 262 |
+
#audio_old = generate(text = text_prompt, voice = voice, model = "eleven_multilingual_v2",stream=True, latency=4)
|
| 263 |
+
audio = generate(
|
| 264 |
+
text=text_prompt,
|
| 265 |
+
voice=voice,
|
| 266 |
+
model="eleven_multilingual_v2",
|
| 267 |
+
stream=True,
|
| 268 |
+
latency=4,
|
| 269 |
+
language=selected_language
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="cloned_audio_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
| 273 |
for chunk in audio:
|
| 274 |
temp_file.write(chunk)
|
|
|
|
| 309 |
# image_path = '/home/user/app/images/marc_smile_enhanced.jpg' old code
|
| 310 |
# source_image = Image.open(image_path) old code
|
| 311 |
text_prompt = request.form['text_prompt']
|
|
|
|
| 312 |
print('Input text prompt: ',text_prompt)
|
| 313 |
+
language = request.form.get('language', 'en') # default English
|
| 314 |
+
print("Selected language:", language)
|
| 315 |
+
|
| 316 |
text_prompt = text_prompt.strip()
|
| 317 |
if not text_prompt:
|
| 318 |
return jsonify({'error': 'Input text prompt cannot be blank'}), 400
|
|
|
|
| 349 |
source_image = request.files['source_image']
|
| 350 |
source_image_path = save_uploaded_file(source_image, source_image.filename, TEMP_DIR)
|
| 351 |
print(f"Source image saved at: {source_image_path}")
|
| 352 |
+
driven_audio_path = generate_audio(voice_cloning, voice_gender, text_prompt,user_voice_path,language)
|
| 353 |
#driven_audio_path_not_use = user_voice_path
|
| 354 |
print(f"driven audio path: {driven_audio_path}")
|
| 355 |
save_dir = tempfile.mkdtemp(dir=TEMP_DIR.name)
|