SeamlessOnDevice

Sleeping

Tonic commited on Nov 20, 2023

Commit

3cb13e6

1 Parent(s): f6806df

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,6 +39,10 @@ def speech_to_text(audio_data, tgt_lang):
     s2t_model = torch.jit.load("unity_on_device_s2t.ptl")
     with torch.no_grad():
         text = s2t_model(audio_input, tgt_lang=languages[tgt_lang])
     return text
 def speech_to_speech_translation(audio_data, tgt_lang):
@@ -46,11 +50,25 @@ def speech_to_speech_translation(audio_data, tgt_lang):
     audio_input, _ = torchaudio.load(file_path)
     s2st_model = torch.jit.load("unity_on_device_s2t.ptl")
     with torch.no_grad():
-        text, units, waveform = s2st_model(audio_input, tgt_lang=languages[tgt_lang])
     output_file = "/tmp/result.wav"
     torchaudio.save(output_file, waveform.unsqueeze(0), sample_rate=16000)
     return text, output_file
 def create_interface():
     with gr.Blocks(theme='ParityError/Anime') as interface:
         gr.Markdown(welcome_message)

     s2t_model = torch.jit.load("unity_on_device_s2t.ptl")
     with torch.no_grad():
         text = s2t_model(audio_input, tgt_lang=languages[tgt_lang])
+    # Print the model's output for debugging
+    print("Speech to Text Model Output:", text)
     return text
 def speech_to_speech_translation(audio_data, tgt_lang):
     audio_input, _ = torchaudio.load(file_path)
     s2st_model = torch.jit.load("unity_on_device_s2t.ptl")
     with torch.no_grad():
+        model_output = s2st_model(audio_input, tgt_lang=languages[tgt_lang])
+    # Print the model's output for debugging
+    print("Speech to Speech Translation Model Output:", model_output)
+    # Check the structure of model_output and unpack accordingly
+    if len(model_output) == 3:
+        text, units, waveform = model_output
+    elif len(model_output) == 2:
+        text, waveform = model_output
+        units = None  # or some default value
+    else:
+        raise ValueError("Unexpected model output format")
     output_file = "/tmp/result.wav"
     torchaudio.save(output_file, waveform.unsqueeze(0), sample_rate=16000)
     return text, output_file
 def create_interface():
     with gr.Blocks(theme='ParityError/Anime') as interface:
         gr.Markdown(welcome_message)