Spanicin commited on
Commit
e1d177a
·
verified ·
1 Parent(s): 6215614

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -5
app.py CHANGED
@@ -191,15 +191,40 @@ def custom_cleanup(temp_dir, exclude_dir):
191
  print(f"Failed to delete {file_path}. Reason: {e}")
192
 
193
 
194
- def generate_audio(voice_cloning, voice_gender, text_prompt,user_voice_path):
195
  print("generate_audio")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  if voice_cloning == 'no':
197
  if voice_gender == 'male':
198
  voice = 'echo'
199
  print('Entering Audio creation using elevenlabs')
200
  set_api_key('sk_e823e586aa0c238fdfae02466faad9472bb668fd04431fca')
201
 
202
- audio = generate(text = text_prompt, voice = "Daniel", model = "eleven_multilingual_v2",stream=True, latency=4)
 
 
 
 
 
 
 
 
 
203
  with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
204
  for chunk in audio:
205
  temp_file.write(chunk)
@@ -234,7 +259,16 @@ def generate_audio(voice_cloning, voice_gender, text_prompt,user_voice_path):
234
  # voice = Voice(voice_id="DeZH4ash9IU9gUcNjVXh",name="Marc",settings=VoiceSettings(
235
  # stability=0.71, similarity_boost=0.9, style=0.0, use_speaker_boost=True),)
236
 
237
- audio = generate(text = text_prompt, voice = voice, model = "eleven_multilingual_v2",stream=True, latency=4)
 
 
 
 
 
 
 
 
 
238
  with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="cloned_audio_",dir=TEMP_DIR.name, delete=False) as temp_file:
239
  for chunk in audio:
240
  temp_file.write(chunk)
@@ -275,8 +309,10 @@ def generate_video():
275
  # image_path = '/home/user/app/images/marc_smile_enhanced.jpg' old code
276
  # source_image = Image.open(image_path) old code
277
  text_prompt = request.form['text_prompt']
278
-
279
  print('Input text prompt: ',text_prompt)
 
 
 
280
  text_prompt = text_prompt.strip()
281
  if not text_prompt:
282
  return jsonify({'error': 'Input text prompt cannot be blank'}), 400
@@ -313,7 +349,7 @@ def generate_video():
313
  source_image = request.files['source_image']
314
  source_image_path = save_uploaded_file(source_image, source_image.filename, TEMP_DIR)
315
  print(f"Source image saved at: {source_image_path}")
316
- driven_audio_path = generate_audio(voice_cloning, voice_gender, text_prompt,user_voice_path)
317
  #driven_audio_path_not_use = user_voice_path
318
  print(f"driven audio path: {driven_audio_path}")
319
  save_dir = tempfile.mkdtemp(dir=TEMP_DIR.name)
 
191
  print(f"Failed to delete {file_path}. Reason: {e}")
192
 
193
 
194
+ def generate_audio(voice_cloning, voice_gender, text_prompt,user_voice_path,language):
195
  print("generate_audio")
196
+ # Map language → ElevenLabs voice/language codes
197
+ language_mapping = {
198
+ "en": "en-IN", # Indian English
199
+ "hi": "hi-IN", # Hindi
200
+ "ta": "ta-IN", # Tamil
201
+ "te": "te-IN", # Telugu
202
+ "ml": "ml-IN", # Malayalam
203
+ "bn": "bn-IN", # Bengali
204
+ "gu": "gu-IN", # Gujarati
205
+ "mr": "mr-IN", # Marathi
206
+ "kn": "kn-IN", # Kannada
207
+ }
208
+
209
+ selected_language = language_mapping.get(language, "en-IN")
210
+ print("TTS Language Selected:", selected_language)
211
+
212
  if voice_cloning == 'no':
213
  if voice_gender == 'male':
214
  voice = 'echo'
215
  print('Entering Audio creation using elevenlabs')
216
  set_api_key('sk_e823e586aa0c238fdfae02466faad9472bb668fd04431fca')
217
 
218
+ #audio_old = generate(text = text_prompt, voice = "Daniel", model = "eleven_multilingual_v2",stream=True, latency=4)
219
+ audio = generate(
220
+ text=text_prompt,
221
+ voice="Daniel",
222
+ model="eleven_multilingual_v2",
223
+ stream=True,
224
+ latency=4,
225
+ language=selected_language
226
+ )
227
+
228
  with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
229
  for chunk in audio:
230
  temp_file.write(chunk)
 
259
  # voice = Voice(voice_id="DeZH4ash9IU9gUcNjVXh",name="Marc",settings=VoiceSettings(
260
  # stability=0.71, similarity_boost=0.9, style=0.0, use_speaker_boost=True),)
261
 
262
+ #audio_old = generate(text = text_prompt, voice = voice, model = "eleven_multilingual_v2",stream=True, latency=4)
263
+ audio = generate(
264
+ text=text_prompt,
265
+ voice=voice,
266
+ model="eleven_multilingual_v2",
267
+ stream=True,
268
+ latency=4,
269
+ language=selected_language
270
+ )
271
+
272
  with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="cloned_audio_",dir=TEMP_DIR.name, delete=False) as temp_file:
273
  for chunk in audio:
274
  temp_file.write(chunk)
 
309
  # image_path = '/home/user/app/images/marc_smile_enhanced.jpg' old code
310
  # source_image = Image.open(image_path) old code
311
  text_prompt = request.form['text_prompt']
 
312
  print('Input text prompt: ',text_prompt)
313
+ language = request.form.get('language', 'en') # default English
314
+ print("Selected language:", language)
315
+
316
  text_prompt = text_prompt.strip()
317
  if not text_prompt:
318
  return jsonify({'error': 'Input text prompt cannot be blank'}), 400
 
349
  source_image = request.files['source_image']
350
  source_image_path = save_uploaded_file(source_image, source_image.filename, TEMP_DIR)
351
  print(f"Source image saved at: {source_image_path}")
352
+ driven_audio_path = generate_audio(voice_cloning, voice_gender, text_prompt,user_voice_path,language)
353
  #driven_audio_path_not_use = user_voice_path
354
  print(f"driven audio path: {driven_audio_path}")
355
  save_dir = tempfile.mkdtemp(dir=TEMP_DIR.name)