Spaces:
Runtime error
Runtime error
| # Multi-stage Docker build following our proven HuggingFace pattern | |
| # This approach uses official moshi-server instead of custom implementation | |
| # Build argument for CUDA compute capability (T4 = 75) | |
| ARG CUDA_COMPUTE_CAP=75 | |
| FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder | |
| # Install system dependencies for moshi-server | |
| RUN apt-get update && apt-get install -y \ | |
| curl \ | |
| build-essential \ | |
| pkg-config \ | |
| libssl-dev \ | |
| libsndfile1-dev \ | |
| libasound2-dev \ | |
| wget \ | |
| ca-certificates \ | |
| git \ | |
| cmake \ | |
| libprotobuf-dev \ | |
| protobuf-compiler \ | |
| python3-dev \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install Rust | |
| ENV RUSTUP_HOME=/usr/local/rustup | |
| ENV CARGO_HOME=/usr/local/cargo | |
| ENV PATH=/usr/local/cargo/bin:$PATH | |
| ENV RUST_VERSION=stable | |
| RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain $RUST_VERSION | |
| RUN chmod -R a+w $RUSTUP_HOME $CARGO_HOME | |
| WORKDIR /app | |
| # Official moshi-server installation stage | |
| FROM base-builder AS moshi-server-builder | |
| ARG CUDA_COMPUTE_CAP | |
| ENV CUDA_COMPUTE_CAP=${CUDA_COMPUTE_CAP} | |
| # Set build optimizations to prevent hangs | |
| ENV CARGO_NET_RETRY=10 | |
| ENV CARGO_HTTP_TIMEOUT=300 | |
| ENV CARGO_HTTP_LOW_SPEED_LIMIT=10 | |
| ENV CARGO_BUILD_JOBS=4 | |
| # Install official moshi-server with CUDA support | |
| # Note: Official moshi-server only has 'cuda' feature, no architecture-specific variants | |
| RUN cargo install --features cuda moshi-server | |
| # Model download stage - pre-load models into image | |
| FROM base-builder AS model-downloader | |
| # Install Python and pip for model downloading | |
| RUN apt-get update && apt-get install -y python3 python3-pip && rm -rf /var/lib/apt/lists/* | |
| # Install huggingface-hub for model downloading | |
| RUN pip3 install --no-cache-dir huggingface-hub | |
| # Set working directory for models first | |
| WORKDIR /app/models | |
| # Create models directory for 1B multilingual model (T4 GPU compatible) | |
| RUN mkdir -p kyutai/stt-1b-en_fr-candle | |
| # Create download script for 1B multilingual model | |
| RUN echo 'from huggingface_hub import hf_hub_download\n\ | |
| import os\n\ | |
| import subprocess\n\ | |
| \n\ | |
| os.makedirs("kyutai/stt-1b-en_fr-candle", exist_ok=True)\n\ | |
| print("π₯ Downloading 1B multilingual STT model for official moshi-server...")\n\ | |
| \n\ | |
| print("β¬οΈ Downloading model.safetensors...")\n\ | |
| hf_hub_download(\n\ | |
| repo_id="kyutai/stt-1b-en_fr-candle",\n\ | |
| filename="model.safetensors",\n\ | |
| local_dir="kyutai/stt-1b-en_fr-candle",\n\ | |
| local_dir_use_symlinks=False\n\ | |
| )\n\ | |
| \n\ | |
| print("β¬οΈ Downloading tokenizer (8000 vocab)...")\n\ | |
| hf_hub_download(\n\ | |
| repo_id="kyutai/stt-1b-en_fr-candle",\n\ | |
| filename="tokenizer_en_fr_audio_8000.model",\n\ | |
| local_dir="kyutai/stt-1b-en_fr-candle",\n\ | |
| local_dir_use_symlinks=False\n\ | |
| )\n\ | |
| \n\ | |
| print("β¬οΈ Downloading Mimi audio tokenizer...")\n\ | |
| hf_hub_download(\n\ | |
| repo_id="kyutai/stt-1b-en_fr-candle",\n\ | |
| filename="mimi-pytorch-e351c8d8@125.safetensors",\n\ | |
| local_dir="kyutai/stt-1b-en_fr-candle",\n\ | |
| local_dir_use_symlinks=False\n\ | |
| )\n\ | |
| \n\ | |
| print("β All models downloaded successfully!")\n\ | |
| result = subprocess.run(["du", "-sh", "kyutai/"], capture_output=True, text=True)\n\ | |
| print("π Model files:", result.stdout)\n\ | |
| ' > download_models.py | |
| # Download models during build time | |
| RUN python3 download_models.py | |
| # Runtime stage | |
| FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS runtime | |
| # Install Python and runtime dependencies including audio libraries for moshi-server | |
| RUN apt-get update && apt-get install -y \ | |
| python3 \ | |
| python3-pip \ | |
| wget \ | |
| curl \ | |
| git \ | |
| sudo \ | |
| libopus0 \ | |
| libsndfile1 \ | |
| libasound2 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install Python dependencies for web interface | |
| RUN pip3 install gradio fastapi uvicorn websockets python-multipart msgpack | |
| # Set working directory | |
| WORKDIR /app | |
| # Copy official moshi-server binary from builder | |
| COPY --from=moshi-server-builder /usr/local/cargo/bin/moshi-server /usr/local/bin/ | |
| # Copy configuration files | |
| COPY configs/ ./configs/ | |
| # Copy pre-downloaded models from model-downloader stage | |
| COPY --from=model-downloader /app/models/ ./models/ | |
| # Copy Python app and requirements | |
| COPY app.py . | |
| COPY requirements.txt . | |
| # Create user for HuggingFace Spaces | |
| RUN useradd -m -u 1000 user | |
| ENV HOME=/home/user \ | |
| PATH=/home/user/.local/bin:$PATH \ | |
| HF_HOME=/home/user/.cache/huggingface | |
| # Create and set permissions for HuggingFace cache directory | |
| RUN mkdir -p /home/user/.cache/huggingface && \ | |
| chown -R user:user /home/user/.cache | |
| # Set proper permissions and ownership | |
| RUN chown -R user:user /app | |
| USER user | |
| # Expose port (HuggingFace Spaces requirement) | |
| EXPOSE 7860 | |
| # Create startup script that handles port constraints | |
| RUN echo '#!/bin/bash\n\ | |
| echo "===== Application Startup at $(date) ====="\n\ | |
| echo "π Starting Kyutai STT Server Moshi v4 with official moshi-server..."\n\ | |
| echo "π Pre-loaded models:"\n\ | |
| ls -lah models/kyutai/stt-1b-en_fr-candle/ 2>/dev/null || echo "Model directory not found"\n\ | |
| echo "GPU Info:"\n\ | |
| nvidia-smi 2>/dev/null || echo "No GPU detected at runtime"\n\ | |
| echo "π Official moshi-server endpoints:"\n\ | |
| echo " - STT Streaming: /api/asr-streaming (MessagePack protocol)"\n\ | |
| echo " - Health Check: /health"\n\ | |
| echo "Starting Python frontend and official moshi-server..."\n\ | |
| \n\ | |
| # Create required directories for moshi-server\n\ | |
| mkdir -p /app/static /app/logs\n\ | |
| \n\ | |
| # Check if port 7860 is available (HuggingFace requirement)\n\ | |
| if lsof -Pi :7860 -sTCP:LISTEN -t >/dev/null 2>&1; then\n\ | |
| echo "β οΈ Port 7860 already in use, exiting gracefully (PID: $$)"\n\ | |
| exit 0\n\ | |
| fi\n\ | |
| \n\ | |
| # Start Python web interface in background\n\ | |
| python3 app.py &\n\ | |
| \n\ | |
| # Start official moshi-server as main process\n\ | |
| # Note: moshi-server will bind to its default port, Python app proxies on 7860\n\ | |
| exec moshi-server worker --config configs/config-stt-en_fr-hf.toml\n\ | |
| ' > /app/start.sh && chmod +x /app/start.sh | |
| # Health check | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ | |
| CMD curl -f http://localhost:7860/health || exit 1 | |
| # Run the combined server | |
| CMD ["/app/start.sh"] |