Spaces:
Running
on
Zero
Running
on
Zero
add task_fake
Browse files- app.py +146 -175
- app/canary_speech_engine.py +2 -0
- app/ui_utils.py +4 -4
- app/utils.py +218 -1
- assets/custom_style.css +51 -0
app.py
CHANGED
|
@@ -14,10 +14,12 @@ from gradio.utils import get_space
|
|
| 14 |
|
| 15 |
from app.utils import (
|
| 16 |
raise_function,
|
|
|
|
| 17 |
generate_coturn_config,
|
| 18 |
read_and_stream_audio,
|
| 19 |
stop_streaming,
|
| 20 |
-
|
|
|
|
| 21 |
)
|
| 22 |
from app.session_utils import (
|
| 23 |
on_load,
|
|
@@ -53,106 +55,15 @@ reset_all_active_session_hash_code()
|
|
| 53 |
theme,css_style = get_custom_theme()
|
| 54 |
|
| 55 |
# logger.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')
|
| 56 |
-
|
| 57 |
-
from app.
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
-
asr_model = nemo_asr.models.ASRModel.from_pretrained("nvidia/canary-1b-v2")
|
| 62 |
-
streaming_audio_processor_config = StreamingAudioProcessorConfig(
|
| 63 |
-
read_size=4000,
|
| 64 |
-
silence_threshold_chunks=1
|
| 65 |
-
)
|
| 66 |
|
| 67 |
-
@spaces.GPU
|
| 68 |
-
def task(session_id: str,
|
| 69 |
-
task_type, lang_source, lang_target,
|
| 70 |
-
chunk_secs, left_context_secs, right_context_secs,
|
| 71 |
-
streaming_policy, alignatt_thr, waitk_lagging,
|
| 72 |
-
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 73 |
-
):
|
| 74 |
-
"""Continuously read and delete .npz chunks while task is active."""
|
| 75 |
-
yield f"initializing the CanarySpeechEngine and Silero_Vad_Engine\n\n"
|
| 76 |
-
# initialize the CanarySpeechEngine and Silero_Vad_Engine
|
| 77 |
-
conf = CanaryConfig.from_params(
|
| 78 |
-
task_type, lang_source, lang_target,
|
| 79 |
-
chunk_secs, left_context_secs, right_context_secs,
|
| 80 |
-
streaming_policy, alignatt_thr, waitk_lagging,
|
| 81 |
-
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 82 |
-
)
|
| 83 |
-
canary_speech_engine = CanarySpeechEngine(asr_model,conf)
|
| 84 |
-
silero_vad_engine = Silero_Vad_Engine()
|
| 85 |
-
streamer = StreamingAudioProcessor(speech_engine=canary_speech_engine,vad_engine=silero_vad_engine,cfg=streaming_audio_processor_config)
|
| 86 |
-
yield f"initialized the CanarySpeechEngine and Silero_Vad_Engine\n\n"
|
| 87 |
-
yield f"Task started for session {session_id}\n\n"
|
| 88 |
-
active_flag = get_active_task_flag_file(session_id)
|
| 89 |
-
with open(active_flag, "w") as f:
|
| 90 |
-
f.write("1")
|
| 91 |
-
chunk_dir = get_folder_chunks(session_id)
|
| 92 |
-
logging.info(f"[{session_id}] task started. {chunk_dir}")
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
try:
|
| 96 |
-
logging.info(f"[{session_id}] task loop started.")
|
| 97 |
-
yield f"Task started for session {session_id}\n\n"
|
| 98 |
-
while os.path.exists(active_flag):
|
| 99 |
-
if not os.path.exists(chunk_dir):
|
| 100 |
-
logging.warning(f"[{session_id}] No chunk directory found for task.")
|
| 101 |
-
yield "No audio chunks yet... waiting for stream.\n"
|
| 102 |
-
time.sleep(0.1)
|
| 103 |
-
continue
|
| 104 |
-
files = sorted(f for f in os.listdir(chunk_dir) if f.endswith(".npz"))
|
| 105 |
-
if not files:
|
| 106 |
-
time.sleep(0.1)
|
| 107 |
-
continue
|
| 108 |
-
|
| 109 |
-
for fname in files:
|
| 110 |
-
fpath = os.path.join(chunk_dir, fname)
|
| 111 |
-
try:
|
| 112 |
-
npz = np.load(fpath)
|
| 113 |
-
samples = npz["data"]
|
| 114 |
-
rate = int(npz["rate"])
|
| 115 |
-
|
| 116 |
-
text = f"Transcribed {fname}: {len(samples)} samples @ {rate}Hz"
|
| 117 |
-
new_texts = streamer.process_chunk(samples)
|
| 118 |
-
for text in new_texts:
|
| 119 |
-
print(text, end='', flush=True)
|
| 120 |
-
yield f"{text}"
|
| 121 |
-
logging.debug(f"[{session_id}] {new_texts}")
|
| 122 |
-
# yield f"{text}\n"
|
| 123 |
-
os.remove(fpath)
|
| 124 |
-
logging.debug(f"[{session_id}] Deleted processed chunk: {fname}")
|
| 125 |
-
except Exception as e:
|
| 126 |
-
logging.error(f"[{session_id}] Error processing {fname}: {e}")
|
| 127 |
-
yield f"Error processing {fname}: {e}\n"
|
| 128 |
-
continue
|
| 129 |
-
|
| 130 |
-
time.sleep(0.1)
|
| 131 |
-
# raise_function()
|
| 132 |
-
final_text = streamer.finalize_stream()
|
| 133 |
-
if final_text:
|
| 134 |
-
print(final_text, end='', flush=True)
|
| 135 |
-
yield f"\n{final_text}"
|
| 136 |
-
# yield f"\n"
|
| 137 |
-
logging.info(f"[{session_id}] task loop ended (flag removed).")
|
| 138 |
-
|
| 139 |
-
except Exception as e:
|
| 140 |
-
logging.error(f"[{session_id}] task error: {e}", exc_info=True)
|
| 141 |
-
yield f"Unexpected error: {e}\n"
|
| 142 |
-
finally:
|
| 143 |
-
# active_flag = os.path.join(TMP_DIR, f"transcribe_active_{session_id}.txt")
|
| 144 |
-
if os.path.exists(active_flag):
|
| 145 |
-
os.remove(active_flag)
|
| 146 |
-
logging.info(f"[{session_id}] task stopped.")
|
| 147 |
-
try:
|
| 148 |
-
if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
|
| 149 |
-
os.rmdir(chunk_dir)
|
| 150 |
-
logging.debug(f"[{session_id}] Cleaned up empty chunk dir.")
|
| 151 |
-
except Exception as e:
|
| 152 |
-
logging.error(f"[{session_id}] Cleanup error: {e}")
|
| 153 |
-
yield "\nCleanup error: {e}"
|
| 154 |
-
logging.info(f"[{session_id}] Exiting task loop.")
|
| 155 |
-
yield "\nTask finished and cleaned up.\n"
|
| 156 |
|
| 157 |
|
| 158 |
with gr.Blocks(theme=theme, css=css_style) as demo:
|
|
@@ -227,46 +138,60 @@ with gr.Blocks(theme=theme, css=css_style) as demo:
|
|
| 227 |
modality="audio",
|
| 228 |
rtc_configuration=generate_coturn_config(),
|
| 229 |
visible=True,
|
| 230 |
-
inputs=main_audio
|
| 231 |
)
|
| 232 |
-
start_stream_button = gr.Button("Start Streaming")
|
| 233 |
|
| 234 |
webrtc_stream.stream(
|
| 235 |
fn=read_and_stream_audio,
|
| 236 |
-
inputs=[active_filepath, session_hash_code, stop_streaming_flags,gr.State(
|
| 237 |
outputs=[webrtc_stream],
|
| 238 |
trigger=start_stream_button.click,
|
| 239 |
concurrency_id="audio_stream",
|
| 240 |
concurrency_limit=10,
|
| 241 |
)
|
| 242 |
status_message_stream = gr.Markdown("", elem_id="status-message-stream", visible=False)
|
| 243 |
-
go_to_config = gr.Button("Go to Configuration", visible=False)
|
| 244 |
go_to_config.click(lambda: gr.Walkthrough(selected=2), outputs=walkthrough)
|
| 245 |
|
| 246 |
# === STEP 3 ===
|
| 247 |
with gr.Step("Configuration", id=2):
|
| 248 |
-
gr.Markdown("
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
|
|
|
| 254 |
with gr.Accordion("Advanced Configuration", open=False):
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
with gr.Row():
|
| 266 |
auto_apply_presets = gr.Checkbox(value=True, label="Auto-apply presets for sample audios")
|
| 267 |
reset_btn = gr.Button("Reset to defaults")
|
| 268 |
-
|
| 269 |
-
|
| 270 |
|
| 271 |
# --- Events ---
|
| 272 |
task_type.change(
|
|
@@ -323,37 +248,47 @@ with gr.Blocks(theme=theme, css=css_style) as demo:
|
|
| 323 |
|
| 324 |
# === STEP 4 ===
|
| 325 |
with gr.Step("Task", id=3) as task_step:
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
with gr.
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
label="Transcription / Translation Result",
|
| 341 |
-
|
| 342 |
lines=10,
|
| 343 |
max_lines= 10,
|
| 344 |
interactive=False,
|
| 345 |
visible=True,
|
| 346 |
-
autoscroll=True
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
|
|
|
|
|
|
|
|
|
| 351 |
|
| 352 |
stop_stream_button.click(
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
|
| 358 |
def stop_task_fn(session_hash_code):
|
| 359 |
transcribe_active = get_active_task_flag_file(session_hash_code)
|
|
@@ -365,47 +300,83 @@ with gr.Blocks(theme=theme, css=css_style) as demo:
|
|
| 365 |
stop_task_button.click(
|
| 366 |
fn=stop_task_fn,
|
| 367 |
inputs=session_hash_code,
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
|
|
|
|
|
|
|
|
|
| 372 |
def start_transcription(
|
| 373 |
session_hash_code, stop_streaming_flags,
|
| 374 |
task_type, lang_source, lang_target,
|
| 375 |
chunk_secs, left_context_secs, right_context_secs,
|
| 376 |
streaming_policy, alignatt_thr, waitk_lagging,
|
| 377 |
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 378 |
-
|
| 379 |
"""Stream transcription or translation results in real time."""
|
| 380 |
|
| 381 |
accumulated = ""
|
| 382 |
yield f"Starting {task_type.lower()}...\n\n",gr.update(visible=False),gr.update(visible=True)
|
| 383 |
|
|
|
|
| 384 |
# Boucle sur le générateur de `task()`
|
| 385 |
-
for msg in task(
|
| 386 |
-
session_hash_code,
|
| 387 |
-
task_type, lang_source, lang_target,
|
| 388 |
-
chunk_secs, left_context_secs, right_context_secs,
|
| 389 |
-
streaming_policy, alignatt_thr, waitk_lagging,
|
| 390 |
-
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 391 |
-
):
|
| 392 |
accumulated += msg
|
| 393 |
yield accumulated,gr.update(visible=False),gr.update(visible=True)
|
| 394 |
|
| 395 |
yield accumulated + "\nDone.",gr.update(visible=True),gr.update(visible=False)
|
| 396 |
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
session_hash_code, stop_streaming_flags,
|
| 401 |
task_type, lang_source, lang_target,
|
| 402 |
chunk_secs, left_context_secs, right_context_secs,
|
| 403 |
streaming_policy, alignatt_thr, waitk_lagging,
|
| 404 |
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 405 |
-
|
| 406 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
)
|
| 408 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
ui_components = [
|
| 410 |
start_stream_button, stop_stream_button,
|
| 411 |
go_to_config, audio_source_step, status_slider,walkthrough,status_message_stream
|
|
@@ -419,19 +390,19 @@ with gr.Blocks(theme=theme, css=css_style) as demo:
|
|
| 419 |
concurrency_limit=10,
|
| 420 |
)
|
| 421 |
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
|
| 436 |
|
| 437 |
if __name__ == "__main__":
|
|
|
|
| 14 |
|
| 15 |
from app.utils import (
|
| 16 |
raise_function,
|
| 17 |
+
READ_SIZE,
|
| 18 |
generate_coturn_config,
|
| 19 |
read_and_stream_audio,
|
| 20 |
stop_streaming,
|
| 21 |
+
task,
|
| 22 |
+
task_fake
|
| 23 |
)
|
| 24 |
from app.session_utils import (
|
| 25 |
on_load,
|
|
|
|
| 55 |
theme,css_style = get_custom_theme()
|
| 56 |
|
| 57 |
# logger.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')
|
| 58 |
+
|
| 59 |
+
from app.streaming_audio_processor import StreamingAudioProcessorConfig
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# asr_model = None
|
| 63 |
+
|
| 64 |
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
with gr.Blocks(theme=theme, css=css_style) as demo:
|
|
|
|
| 138 |
modality="audio",
|
| 139 |
rtc_configuration=generate_coturn_config(),
|
| 140 |
visible=True,
|
| 141 |
+
inputs=main_audio,
|
| 142 |
)
|
| 143 |
+
start_stream_button = gr.Button("▶️ Start Streaming", variant="primary")
|
| 144 |
|
| 145 |
webrtc_stream.stream(
|
| 146 |
fn=read_and_stream_audio,
|
| 147 |
+
inputs=[active_filepath, session_hash_code, stop_streaming_flags,gr.State(READ_SIZE)],
|
| 148 |
outputs=[webrtc_stream],
|
| 149 |
trigger=start_stream_button.click,
|
| 150 |
concurrency_id="audio_stream",
|
| 151 |
concurrency_limit=10,
|
| 152 |
)
|
| 153 |
status_message_stream = gr.Markdown("", elem_id="status-message-stream", visible=False)
|
| 154 |
+
go_to_config = gr.Button("Go to Configuration", visible=False, variant="secondary")
|
| 155 |
go_to_config.click(lambda: gr.Walkthrough(selected=2), outputs=walkthrough)
|
| 156 |
|
| 157 |
# === STEP 3 ===
|
| 158 |
with gr.Step("Configuration", id=2):
|
| 159 |
+
gr.Markdown("### Step 3: Configure the Task")
|
| 160 |
+
with gr.Group():
|
| 161 |
+
with gr.Row():
|
| 162 |
+
task_type = gr.Radio(["Transcription", "Translation"], value="Transcription", label="Task Type")
|
| 163 |
+
with gr.Row():
|
| 164 |
+
lang_source = gr.Dropdown(list(SUPPORTED_LANGS_MAP.keys()), value="French", label="Source Language")
|
| 165 |
+
lang_target = gr.Dropdown(list(SUPPORTED_LANGS_MAP.keys()), value="English", label="Target Language", visible=False)
|
| 166 |
with gr.Accordion("Advanced Configuration", open=False):
|
| 167 |
+
with gr.Group():
|
| 168 |
+
with gr.Row():
|
| 169 |
+
gr.Markdown("##### Chunks ")
|
| 170 |
+
with gr.Row():
|
| 171 |
+
left_context_secs = gr.Slider(value=20.0, label="left_context_secs",info="Streaming chunk duration in seconds (left context)", minimum=1.0, maximum=60.0, step=1.0, show_reset_button=False)
|
| 172 |
+
chunk_secs = gr.Slider(value=1.0, label="chunk_secs", info="Streaming chunk duration in seconds (chunk)", minimum=0.1, maximum=5.0, step=0.1, show_reset_button=False)
|
| 173 |
+
right_context_secs = gr.Slider(value=0.5, label="right_context_secs", info="Streaming chunk duration in seconds (right context)", minimum=0.1, maximum=10.0, step=0.1, show_reset_button=False)
|
| 174 |
+
gr.Markdown("---")
|
| 175 |
+
with gr.Group():
|
| 176 |
+
with gr.Row():
|
| 177 |
+
gr.Markdown("##### Decoding ")
|
| 178 |
+
with gr.Row():
|
| 179 |
+
streaming_policy = gr.Dropdown(["waitk", "alignatt"], value="waitk", label="streaming_policy", elem_classes="full-width",
|
| 180 |
+
info="“Wait-k: Higher accuracy, requires larger left context, higher latency” \n”AlignAtt: Lower latency, suitable for production, predicts multiple tokens per chunk”")
|
| 181 |
+
|
| 182 |
+
with gr.Row():
|
| 183 |
+
alignatt_thr = gr.Number(value=8, label="alignatt_thr", info="Cross-attention threshold for AlignAtt policy (default: 8), alignatt only", precision=0)
|
| 184 |
+
waitk_lagging = gr.Number(value=2, label="waitk_lagging", info="Number of chunks to wait in the beginning (default: 2), works for both policies", precision=0)
|
| 185 |
+
with gr.Row():
|
| 186 |
+
exclude_sink_frames = gr.Number(value=8, label="exclude_sink_frames", info="Number of frames to exclude from the xatt scores calculation (default: 8), alignatt only", precision=0)
|
| 187 |
+
xatt_scores_layer = gr.Number(value=-2, label="xatt_scores_layer", info="Layer to get cross-attention (xatt) scores from (default: -2), alignatt only", precision=0)
|
| 188 |
+
with gr.Row():
|
| 189 |
+
hallucinations_detector = gr.Checkbox(value=True, label="hallucinations_detector" , info="Detect hallucinations in the predicted tokens (default: True), works for both policies" )
|
| 190 |
with gr.Row():
|
| 191 |
auto_apply_presets = gr.Checkbox(value=True, label="Auto-apply presets for sample audios")
|
| 192 |
reset_btn = gr.Button("Reset to defaults")
|
| 193 |
+
with gr.Accordion("Configuration Summary", open=False):
|
| 194 |
+
summary_box = gr.Textbox(lines=15, interactive=False,show_label=False)
|
| 195 |
|
| 196 |
# --- Events ---
|
| 197 |
task_type.change(
|
|
|
|
| 248 |
|
| 249 |
# === STEP 4 ===
|
| 250 |
with gr.Step("Task", id=3) as task_step:
|
| 251 |
+
with gr.Row():
|
| 252 |
+
gr.Markdown("## Step 4: Start the Task")
|
| 253 |
+
with gr.Row():
|
| 254 |
+
with gr.Column():
|
| 255 |
+
status_slider = gr.Slider(
|
| 256 |
+
0, 100,
|
| 257 |
+
value=0,
|
| 258 |
+
label="Streaming Progress",
|
| 259 |
+
show_label=True,
|
| 260 |
+
interactive=False,
|
| 261 |
+
visible=False,
|
| 262 |
+
show_reset_button=False
|
| 263 |
+
)
|
| 264 |
+
stop_stream_button = gr.Button("⏹️ Stop Streaming", visible=False,variant="stop")
|
| 265 |
+
with gr.Row():
|
| 266 |
+
gr.Markdown("---")
|
| 267 |
+
with gr.Row():
|
| 268 |
+
gr.Markdown("##### Transcription / Translation Result")
|
| 269 |
+
with gr.Row():
|
| 270 |
+
|
| 271 |
+
task_output = gr.Textbox(
|
| 272 |
label="Transcription / Translation Result",
|
| 273 |
+
show_label=False,
|
| 274 |
lines=10,
|
| 275 |
max_lines= 10,
|
| 276 |
interactive=False,
|
| 277 |
visible=True,
|
| 278 |
+
autoscroll=True,
|
| 279 |
+
elem_id="task-output-box"
|
| 280 |
+
)
|
| 281 |
+
with gr.Row():
|
| 282 |
+
status_message_task = gr.Markdown("", elem_id="status-message-task",elem_classes=["info"], visible=False)
|
| 283 |
+
with gr.Row():
|
| 284 |
+
start_task_button = gr.Button("▶️ Start Task", visible=True, variant="primary")
|
| 285 |
+
stop_task_button = gr.Button("⏹️ Stop Task", visible=False,variant="stop")
|
| 286 |
|
| 287 |
stop_stream_button.click(
|
| 288 |
+
fn=stop_streaming,
|
| 289 |
+
inputs=[session_hash_code, stop_streaming_flags],
|
| 290 |
+
outputs=[stop_streaming_flags],
|
| 291 |
+
)
|
| 292 |
|
| 293 |
def stop_task_fn(session_hash_code):
|
| 294 |
transcribe_active = get_active_task_flag_file(session_hash_code)
|
|
|
|
| 300 |
stop_task_button.click(
|
| 301 |
fn=stop_task_fn,
|
| 302 |
inputs=session_hash_code,
|
| 303 |
+
outputs=task_output
|
| 304 |
+
)
|
| 305 |
+
# task(session_hash_code)
|
| 306 |
+
config_task_ui = [session_hash_code,task_type, lang_source, lang_target,
|
| 307 |
+
chunk_secs, left_context_secs, right_context_secs,
|
| 308 |
+
streaming_policy, alignatt_thr, waitk_lagging,
|
| 309 |
+
exclude_sink_frames, xatt_scores_layer, hallucinations_detector]
|
| 310 |
def start_transcription(
|
| 311 |
session_hash_code, stop_streaming_flags,
|
| 312 |
task_type, lang_source, lang_target,
|
| 313 |
chunk_secs, left_context_secs, right_context_secs,
|
| 314 |
streaming_policy, alignatt_thr, waitk_lagging,
|
| 315 |
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 316 |
+
):
|
| 317 |
"""Stream transcription or translation results in real time."""
|
| 318 |
|
| 319 |
accumulated = ""
|
| 320 |
yield f"Starting {task_type.lower()}...\n\n",gr.update(visible=False),gr.update(visible=True)
|
| 321 |
|
| 322 |
+
|
| 323 |
# Boucle sur le générateur de `task()`
|
| 324 |
+
for msg in task(session_hash_code,config_task_ui):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
accumulated += msg
|
| 326 |
yield accumulated,gr.update(visible=False),gr.update(visible=True)
|
| 327 |
|
| 328 |
yield accumulated + "\nDone.",gr.update(visible=True),gr.update(visible=False)
|
| 329 |
|
| 330 |
+
|
| 331 |
+
def start_task(
|
| 332 |
+
session_hash_code,
|
|
|
|
| 333 |
task_type, lang_source, lang_target,
|
| 334 |
chunk_secs, left_context_secs, right_context_secs,
|
| 335 |
streaming_policy, alignatt_thr, waitk_lagging,
|
| 336 |
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 337 |
+
):
|
| 338 |
+
"""Stream transcription or translation results in real time."""
|
| 339 |
+
accumulated = ""
|
| 340 |
+
# Boucle sur le générateur de `task2()`
|
| 341 |
+
for result, status, current_chunk in task_fake(
|
| 342 |
+
session_hash_code,
|
| 343 |
+
task_type, lang_source, lang_target,
|
| 344 |
+
chunk_secs, left_context_secs, right_context_secs,
|
| 345 |
+
streaming_policy, alignatt_thr, waitk_lagging,
|
| 346 |
+
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 347 |
+
):
|
| 348 |
+
if status == "success":
|
| 349 |
+
yield accumulated + result, gr.update(visible=True,value=current_chunk , elem_classes=["info"]), gr.update(visible=False), gr.update(visible=True)
|
| 350 |
+
accumulated += result
|
| 351 |
+
elif status in ["error", "warning", "info", "done"]:
|
| 352 |
+
yield accumulated, gr.update(visible=True,value=result , elem_classes=[status]), gr.update(visible=True), gr.update(visible=False)
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
start_task_button.click(
|
| 356 |
+
fn=start_task,
|
| 357 |
+
inputs=[
|
| 358 |
+
session_hash_code,
|
| 359 |
+
task_type, lang_source, lang_target,
|
| 360 |
+
chunk_secs, left_context_secs, right_context_secs,
|
| 361 |
+
streaming_policy, alignatt_thr, waitk_lagging,
|
| 362 |
+
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 363 |
+
|
| 364 |
+
],
|
| 365 |
+
outputs=[task_output,status_message_task,start_task_button,stop_task_button]
|
| 366 |
)
|
| 367 |
|
| 368 |
+
# start_task_button.click(
|
| 369 |
+
# fn=start_task,
|
| 370 |
+
# inputs=[
|
| 371 |
+
# session_hash_code, stop_streaming_flags,
|
| 372 |
+
# task_type, lang_source, lang_target,
|
| 373 |
+
# chunk_secs, left_context_secs, right_context_secs,
|
| 374 |
+
# streaming_policy, alignatt_thr, waitk_lagging,
|
| 375 |
+
# exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 376 |
+
# ],
|
| 377 |
+
# outputs=[task_output,status_message_task,start_task_button,stop_task_button]
|
| 378 |
+
# )
|
| 379 |
+
|
| 380 |
ui_components = [
|
| 381 |
start_stream_button, stop_stream_button,
|
| 382 |
go_to_config, audio_source_step, status_slider,walkthrough,status_message_stream
|
|
|
|
| 390 |
concurrency_limit=10,
|
| 391 |
)
|
| 392 |
|
| 393 |
+
# def toggle_task_buttons():
|
| 394 |
+
# return (
|
| 395 |
+
# gr.update(visible=False),
|
| 396 |
+
# gr.update(visible=True),
|
| 397 |
+
# gr.update(visible=True)
|
| 398 |
+
# )
|
| 399 |
+
|
| 400 |
+
# start_task_button.click(
|
| 401 |
+
# fn=toggle_task_buttons,
|
| 402 |
+
# inputs=None,
|
| 403 |
+
# outputs=[start_task_button, stop_task_button, stop_stream_button],
|
| 404 |
+
# queue=False
|
| 405 |
+
# )
|
| 406 |
|
| 407 |
|
| 408 |
if __name__ == "__main__":
|
app/canary_speech_engine.py
CHANGED
|
@@ -125,6 +125,7 @@ class CanaryConfig:
|
|
| 125 |
"""Create a CanaryConfig instance from parameters"""
|
| 126 |
# Convert task type to model task
|
| 127 |
task = "asr" if task_type == "Transcription" else "ast"
|
|
|
|
| 128 |
|
| 129 |
return cls(
|
| 130 |
chunk_secs=chunk_secs,
|
|
@@ -158,6 +159,7 @@ class CanarySpeechEngine(IStreamingSpeechEngine):
|
|
| 158 |
Args:
|
| 159 |
cfg: An OmegaConf object containing 'model' and 'streaming' configs.
|
| 160 |
"""
|
|
|
|
| 161 |
self.cfg = cfg.toOmegaConf() # Store the full config
|
| 162 |
|
| 163 |
# Setup device and dtype from config
|
|
|
|
| 125 |
"""Create a CanaryConfig instance from parameters"""
|
| 126 |
# Convert task type to model task
|
| 127 |
task = "asr" if task_type == "Transcription" else "ast"
|
| 128 |
+
target_lang = source_lang if task_type == "Transcription" else target_lang
|
| 129 |
|
| 130 |
return cls(
|
| 131 |
chunk_secs=chunk_secs,
|
|
|
|
| 159 |
Args:
|
| 160 |
cfg: An OmegaConf object containing 'model' and 'streaming' configs.
|
| 161 |
"""
|
| 162 |
+
logging.debug(f"Initializing CanarySpeechEngine with config: {cfg}")
|
| 163 |
self.cfg = cfg.toOmegaConf() # Store the full config
|
| 164 |
|
| 165 |
# Setup device and dtype from config
|
app/ui_utils.py
CHANGED
|
@@ -26,10 +26,10 @@ EXAMPLE_CONFIGS = {
|
|
| 26 |
"exclude_sink_frames": 8, "xatt_scores_layer": -2, "hallucinations_detector": True
|
| 27 |
},
|
| 28 |
"data/french_news.wav": {
|
| 29 |
-
"task_type": "Transcription", "lang_source": "French", "lang_target": "
|
| 30 |
-
"chunk_secs": 1.0, "left_context_secs": 15.0, "right_context_secs": 0.
|
| 31 |
-
"streaming_policy": "alignatt", "alignatt_thr":
|
| 32 |
-
"exclude_sink_frames":
|
| 33 |
},
|
| 34 |
"data/spanish_podcast.wav": {
|
| 35 |
"task_type": "Translation", "lang_source": "Spanish", "lang_target": "English",
|
|
|
|
| 26 |
"exclude_sink_frames": 8, "xatt_scores_layer": -2, "hallucinations_detector": True
|
| 27 |
},
|
| 28 |
"data/french_news.wav": {
|
| 29 |
+
"task_type": "Transcription", "lang_source": "French", "lang_target": "French",
|
| 30 |
+
"chunk_secs": 1.0, "left_context_secs": 15.0, "right_context_secs": 0.5,
|
| 31 |
+
"streaming_policy": "alignatt", "alignatt_thr": 8.0, "waitk_lagging": 3,
|
| 32 |
+
"exclude_sink_frames": 8, "xatt_scores_layer": -2, "hallucinations_detector": True
|
| 33 |
},
|
| 34 |
"data/spanish_podcast.wav": {
|
| 35 |
"task_type": "Translation", "lang_source": "Spanish", "lang_target": "English",
|
app/utils.py
CHANGED
|
@@ -18,7 +18,14 @@ from app.session_utils import (
|
|
| 18 |
get_active_task_flag_file,
|
| 19 |
get_folder_chunks
|
| 20 |
)
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# --------------------------------------------------------
|
| 24 |
# Utility functions
|
|
@@ -120,6 +127,216 @@ def read_and_stream_audio(filepath_to_stream: str, session_id: str, stop_streami
|
|
| 120 |
|
| 121 |
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
def handle_stream_error(session_id: str, error: Exception | str, stop_streaming_flags: dict | None = None):
|
| 124 |
"""
|
| 125 |
Handle streaming errors:
|
|
|
|
| 18 |
get_active_task_flag_file,
|
| 19 |
get_folder_chunks
|
| 20 |
)
|
| 21 |
+
from app.ui_utils import (
|
| 22 |
+
SUPPORTED_LANGS_MAP
|
| 23 |
+
)
|
| 24 |
+
from app.canary_speech_engine import CanarySpeechEngine,CanaryConfig
|
| 25 |
+
from app.silero_vad_engine import Silero_Vad_Engine
|
| 26 |
+
from app.streaming_audio_processor import StreamingAudioProcessor,StreamingAudioProcessorConfig
|
| 27 |
+
import nemo.collections.asr as nemo_asr
|
| 28 |
+
READ_SIZE=4000
|
| 29 |
|
| 30 |
# --------------------------------------------------------
|
| 31 |
# Utility functions
|
|
|
|
| 127 |
|
| 128 |
|
| 129 |
|
| 130 |
+
# asr_model = nemo_asr.models.ASRModel.from_pretrained("nvidia/canary-1b-v2")
|
| 131 |
+
asr_model = None
|
| 132 |
+
|
| 133 |
+
@spaces.GPU
|
| 134 |
+
def task_fake(session_id: str,
|
| 135 |
+
task_type, lang_source, lang_target,
|
| 136 |
+
chunk_secs, left_context_secs, right_context_secs,
|
| 137 |
+
streaming_policy, alignatt_thr, waitk_lagging,
|
| 138 |
+
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 139 |
+
):
|
| 140 |
+
"""Continuously read and delete .npz chunks while task is active."""
|
| 141 |
+
global asr_model
|
| 142 |
+
yield ("initializing the CanarySpeechEngine and Silero_Vad_Engine", "info", None)
|
| 143 |
+
### TODO
|
| 144 |
+
##-----------
|
| 145 |
+
# conf = CanaryConfig.from_params(
|
| 146 |
+
# task_type, SUPPORTED_LANGS_MAP.get(lang_source),SUPPORTED_LANGS_MAP.get(lang_target) ,
|
| 147 |
+
# chunk_secs, left_context_secs, right_context_secs,
|
| 148 |
+
# streaming_policy, alignatt_thr, waitk_lagging,
|
| 149 |
+
# exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 150 |
+
# )
|
| 151 |
+
|
| 152 |
+
# canary_speech_engine = CanarySpeechEngine(asr_model,conf)
|
| 153 |
+
# silero_vad_engine = Silero_Vad_Engine()
|
| 154 |
+
# streaming_audio_processor_config = StreamingAudioProcessorConfig(
|
| 155 |
+
# read_size=READ_SIZE,
|
| 156 |
+
# silence_threshold_chunks=1
|
| 157 |
+
# )
|
| 158 |
+
# streamer = StreamingAudioProcessor(speech_engine=canary_speech_engine,vad_engine=silero_vad_engine,cfg=streaming_audio_processor_config)
|
| 159 |
+
##-----------
|
| 160 |
+
yield ("initialized the CanarySpeechEngine and Silero_Vad_Engine", "info", None)
|
| 161 |
+
yield (f"Task started for session {session_id}", "info", None)
|
| 162 |
+
|
| 163 |
+
active_flag = get_active_task_flag_file(session_id)
|
| 164 |
+
with open(active_flag, "w") as f:
|
| 165 |
+
f.write("1")
|
| 166 |
+
chunk_dir = get_folder_chunks(session_id)
|
| 167 |
+
logging.info(f"[{session_id}] task started. {chunk_dir}")
|
| 168 |
+
|
| 169 |
+
try:
|
| 170 |
+
logging.info(f"[{session_id}] task loop started.")
|
| 171 |
+
yield (f"Task started for session {session_id}", "info", None)
|
| 172 |
+
|
| 173 |
+
while os.path.exists(active_flag):
|
| 174 |
+
if not os.path.exists(chunk_dir):
|
| 175 |
+
logging.warning(f"[{session_id}] No chunk directory found for task.")
|
| 176 |
+
yield ("No audio chunks yet... waiting for stream.", "warning", None)
|
| 177 |
+
time.sleep(0.1)
|
| 178 |
+
continue
|
| 179 |
+
|
| 180 |
+
files = sorted(f for f in os.listdir(chunk_dir) if f.endswith(".npz"))
|
| 181 |
+
if not files:
|
| 182 |
+
time.sleep(0.1)
|
| 183 |
+
continue
|
| 184 |
+
|
| 185 |
+
for fname in files:
|
| 186 |
+
fpath = os.path.join(chunk_dir, fname)
|
| 187 |
+
try:
|
| 188 |
+
npz = np.load(fpath)
|
| 189 |
+
samples = npz["data"]
|
| 190 |
+
rate = int(npz["rate"])
|
| 191 |
+
##-----------
|
| 192 |
+
# new_texts = streamer.process_chunk(samples)
|
| 193 |
+
# for text in new_texts:
|
| 194 |
+
# print(text, end='', flush=True)
|
| 195 |
+
# yield (text, "success", text)
|
| 196 |
+
# logging.debug(f"[{session_id}] {new_texts}")
|
| 197 |
+
##-----------
|
| 198 |
+
### TODO
|
| 199 |
+
text = f"Transcribed {fname}: {len(samples)} samples @ {rate}Hz\n"
|
| 200 |
+
yield (text, "success", fname)
|
| 201 |
+
os.remove(fpath)
|
| 202 |
+
logging.debug(f"[{session_id}] Deleted processed chunk: {fname}")
|
| 203 |
+
except Exception as e:
|
| 204 |
+
logging.warning(f"[{session_id}] Error processing {fname}: {e}")
|
| 205 |
+
yield (f"Error processing {fname}: {e}", "warning", fname)
|
| 206 |
+
continue
|
| 207 |
+
time.sleep(0.1)
|
| 208 |
+
|
| 209 |
+
# TODO
|
| 210 |
+
##-----------
|
| 211 |
+
# final_text = streamer.finalize_stream()
|
| 212 |
+
# yield (text, "success", final_text)
|
| 213 |
+
##-----------
|
| 214 |
+
yield ("DONE", "done", None)
|
| 215 |
+
logging.info(f"[{session_id}] task loop ended (flag removed).")
|
| 216 |
+
|
| 217 |
+
except Exception as e:
|
| 218 |
+
logging.error(f"[{session_id}] task error: {e}", exc_info=True)
|
| 219 |
+
yield (f"Unexpected error: {e}", "error", None)
|
| 220 |
+
|
| 221 |
+
finally:
|
| 222 |
+
if os.path.exists(active_flag):
|
| 223 |
+
os.remove(active_flag)
|
| 224 |
+
logging.info(f"[{session_id}] task stopped.")
|
| 225 |
+
|
| 226 |
+
try:
|
| 227 |
+
if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
|
| 228 |
+
os.rmdir(chunk_dir)
|
| 229 |
+
logging.debug(f"[{session_id}] Cleaned up empty chunk dir.")
|
| 230 |
+
except Exception as e:
|
| 231 |
+
logging.error(f"[{session_id}] Cleanup error: {e}")
|
| 232 |
+
yield (f"Cleanup error: {e}", "error", None)
|
| 233 |
+
|
| 234 |
+
logging.info(f"[{session_id}] Exiting task loop.")
|
| 235 |
+
yield ("Task finished and cleaned up.", "done", None)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def task(session_id: str,
|
| 240 |
+
task_type, lang_source, lang_target,
|
| 241 |
+
chunk_secs, left_context_secs, right_context_secs,
|
| 242 |
+
streaming_policy, alignatt_thr, waitk_lagging,
|
| 243 |
+
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 244 |
+
):
|
| 245 |
+
"""Continuously read and delete .npz chunks while task is active."""
|
| 246 |
+
global asr_model
|
| 247 |
+
yield ("initializing the CanarySpeechEngine and Silero_Vad_Engine", "info", None)
|
| 248 |
+
conf = CanaryConfig.from_params(
|
| 249 |
+
task_type, SUPPORTED_LANGS_MAP.get(lang_source),SUPPORTED_LANGS_MAP.get(lang_target) ,
|
| 250 |
+
chunk_secs, left_context_secs, right_context_secs,
|
| 251 |
+
streaming_policy, alignatt_thr, waitk_lagging,
|
| 252 |
+
exclude_sink_frames, xatt_scores_layer, hallucinations_detector
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
canary_speech_engine = CanarySpeechEngine(asr_model,conf)
|
| 256 |
+
silero_vad_engine = Silero_Vad_Engine()
|
| 257 |
+
streaming_audio_processor_config = StreamingAudioProcessorConfig(
|
| 258 |
+
read_size=READ_SIZE,
|
| 259 |
+
silence_threshold_chunks=1
|
| 260 |
+
)
|
| 261 |
+
streamer = StreamingAudioProcessor(speech_engine=canary_speech_engine,vad_engine=silero_vad_engine,cfg=streaming_audio_processor_config)
|
| 262 |
+
yield ("initialized the CanarySpeechEngine and Silero_Vad_Engine", "info", None)
|
| 263 |
+
yield (f"Task started for session {session_id}", "info", None)
|
| 264 |
+
|
| 265 |
+
active_flag = get_active_task_flag_file(session_id)
|
| 266 |
+
with open(active_flag, "w") as f:
|
| 267 |
+
f.write("1")
|
| 268 |
+
chunk_dir = get_folder_chunks(session_id)
|
| 269 |
+
logging.info(f"[{session_id}] task started. {chunk_dir}")
|
| 270 |
+
|
| 271 |
+
try:
|
| 272 |
+
logging.info(f"[{session_id}] task loop started.")
|
| 273 |
+
yield (f"Task started for session {session_id}", "info", None)
|
| 274 |
+
|
| 275 |
+
while os.path.exists(active_flag):
|
| 276 |
+
if not os.path.exists(chunk_dir):
|
| 277 |
+
logging.warning(f"[{session_id}] No chunk directory found for task.")
|
| 278 |
+
yield ("No audio chunks yet... waiting for stream.", "warning", None)
|
| 279 |
+
time.sleep(0.1)
|
| 280 |
+
continue
|
| 281 |
+
|
| 282 |
+
files = sorted(f for f in os.listdir(chunk_dir) if f.endswith(".npz"))
|
| 283 |
+
if not files:
|
| 284 |
+
time.sleep(0.1)
|
| 285 |
+
continue
|
| 286 |
+
|
| 287 |
+
for fname in files:
|
| 288 |
+
fpath = os.path.join(chunk_dir, fname)
|
| 289 |
+
try:
|
| 290 |
+
npz = np.load(fpath)
|
| 291 |
+
samples = npz["data"]
|
| 292 |
+
rate = int(npz["rate"])
|
| 293 |
+
new_texts = streamer.process_chunk(samples)
|
| 294 |
+
for text in new_texts:
|
| 295 |
+
print(text, end='', flush=True)
|
| 296 |
+
yield (text, "success", text)
|
| 297 |
+
logging.debug(f"[{session_id}] {new_texts}")
|
| 298 |
+
### TODO
|
| 299 |
+
# text = f"Transcribed {fname}: {len(samples)} samples @ {rate}Hz\n"
|
| 300 |
+
# yield (text, "success", fname)
|
| 301 |
+
os.remove(fpath)
|
| 302 |
+
logging.debug(f"[{session_id}] Deleted processed chunk: {fname}")
|
| 303 |
+
except Exception as e:
|
| 304 |
+
logging.warning(f"[{session_id}] Error processing {fname}: {e}")
|
| 305 |
+
yield (f"Error processing {fname}: {e}", "warning", fname)
|
| 306 |
+
continue
|
| 307 |
+
time.sleep(0.1)
|
| 308 |
+
|
| 309 |
+
# TODO
|
| 310 |
+
final_text = streamer.finalize_stream()
|
| 311 |
+
yield (text, "success", final_text)
|
| 312 |
+
# if final_text:
|
| 313 |
+
# print(final_text, end='', flush=True)
|
| 314 |
+
# yield f"\n{final_text}"
|
| 315 |
+
##
|
| 316 |
+
yield ("DONE", "done", None)
|
| 317 |
+
logging.info(f"[{session_id}] task loop ended (flag removed).")
|
| 318 |
+
|
| 319 |
+
except Exception as e:
|
| 320 |
+
logging.error(f"[{session_id}] task error: {e}", exc_info=True)
|
| 321 |
+
yield (f"Unexpected error: {e}", "error", None)
|
| 322 |
+
|
| 323 |
+
finally:
|
| 324 |
+
if os.path.exists(active_flag):
|
| 325 |
+
os.remove(active_flag)
|
| 326 |
+
logging.info(f"[{session_id}] task stopped.")
|
| 327 |
+
|
| 328 |
+
try:
|
| 329 |
+
if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
|
| 330 |
+
os.rmdir(chunk_dir)
|
| 331 |
+
logging.debug(f"[{session_id}] Cleaned up empty chunk dir.")
|
| 332 |
+
except Exception as e:
|
| 333 |
+
logging.error(f"[{session_id}] Cleanup error: {e}")
|
| 334 |
+
yield (f"Cleanup error: {e}", "error", None)
|
| 335 |
+
|
| 336 |
+
logging.info(f"[{session_id}] Exiting task loop.")
|
| 337 |
+
yield ("Task finished and cleaned up.", "done", None)
|
| 338 |
+
|
| 339 |
+
|
| 340 |
def handle_stream_error(session_id: str, error: Exception | str, stop_streaming_flags: dict | None = None):
|
| 341 |
"""
|
| 342 |
Handle streaming errors:
|
assets/custom_style.css
CHANGED
|
@@ -144,4 +144,55 @@ body {
|
|
| 144 |
padding: 0.75rem;
|
| 145 |
color: #991B1B;
|
| 146 |
font-weight: 500;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
}
|
|
|
|
| 144 |
padding: 0.75rem;
|
| 145 |
color: #991B1B;
|
| 146 |
font-weight: 500;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
#status-message-task {
|
| 150 |
+
padding: 0.75rem;
|
| 151 |
+
border-radius: 8px; /* Coins arrondis */
|
| 152 |
+
margin-top: 10px;
|
| 153 |
+
font-weight: 500; /* Un peu plus gras que la normale */
|
| 154 |
+
border: 1px solid transparent;
|
| 155 |
+
display: none; /* Caché par défaut */
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
/* Le style .info (bleu) */
|
| 159 |
+
#status-message-task.info{
|
| 160 |
+
color: #0c5464; /* Texte bleu foncé */
|
| 161 |
+
background-color: #d1ecf1; /* Fond bleu clair */
|
| 162 |
+
border-color: #bee5eb; /* Bordure bleue */
|
| 163 |
+
display: block; /* Le rend visible */
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
/* Le style .warning (jaune/orange) */
|
| 167 |
+
#status-message-task.warning {
|
| 168 |
+
color: #856404; /* Texte ocre */
|
| 169 |
+
background-color: #fff3cd; /* Fond jaune clair */
|
| 170 |
+
border-color: #ffeeba; /* Bordure jaune */
|
| 171 |
+
display: block; /* Le rend visible */
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
/* Le style .error (rouge) */
|
| 175 |
+
#status-message-task.error {
|
| 176 |
+
color: #721c24; /* Texte rouge foncé */
|
| 177 |
+
background-color: #f8d7da; /* Fond rouge clair */
|
| 178 |
+
border-color: #f5c6cb; /* Bordure rouge */
|
| 179 |
+
display: block; /* Le rend visible */
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
/* Styles personnalisés pour le WebRTC */
|
| 183 |
+
#webcam-stream {
|
| 184 |
+
border: 2px solid #007bff;
|
| 185 |
+
border-radius: 10px;
|
| 186 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
| 187 |
+
background-color: #f8f9fa;
|
| 188 |
+
margin: 10px 0;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
#webcam-stream .gr-webRTC {
|
| 192 |
+
background-color: #e9ecef;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
#task-output-box textarea {
|
| 196 |
+
font-size: 1.15em; /* 'Moyenne taille' - ajustez au besoin */
|
| 197 |
+
font-weight: bold; /* 'En gras' */
|
| 198 |
}
|