Peter Michael Gits
Add required log_dir field to moshi-server TOML config
8726325
# Multi-stage Docker build following our proven HuggingFace pattern
# This approach uses official moshi-server instead of custom implementation
# Build argument for CUDA compute capability (T4 = 75)
ARG CUDA_COMPUTE_CAP=75
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder
# Install system dependencies for moshi-server
RUN apt-get update && apt-get install -y \
curl \
build-essential \
pkg-config \
libssl-dev \
libsndfile1-dev \
libasound2-dev \
wget \
ca-certificates \
git \
cmake \
libprotobuf-dev \
protobuf-compiler \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# Install Rust
ENV RUSTUP_HOME=/usr/local/rustup
ENV CARGO_HOME=/usr/local/cargo
ENV PATH=/usr/local/cargo/bin:$PATH
ENV RUST_VERSION=stable
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain $RUST_VERSION
RUN chmod -R a+w $RUSTUP_HOME $CARGO_HOME
WORKDIR /app
# Official moshi-server installation stage
FROM base-builder AS moshi-server-builder
ARG CUDA_COMPUTE_CAP
ENV CUDA_COMPUTE_CAP=${CUDA_COMPUTE_CAP}
# Set build optimizations to prevent hangs
ENV CARGO_NET_RETRY=10
ENV CARGO_HTTP_TIMEOUT=300
ENV CARGO_HTTP_LOW_SPEED_LIMIT=10
ENV CARGO_BUILD_JOBS=4
# Install official moshi-server with CUDA support
# Note: Official moshi-server only has 'cuda' feature, no architecture-specific variants
RUN cargo install --features cuda moshi-server
# Model download stage - pre-load models into image
FROM base-builder AS model-downloader
# Install Python and pip for model downloading
RUN apt-get update && apt-get install -y python3 python3-pip && rm -rf /var/lib/apt/lists/*
# Install huggingface-hub for model downloading
RUN pip3 install --no-cache-dir huggingface-hub
# Set working directory for models first
WORKDIR /app/models
# Create models directory for 1B multilingual model (T4 GPU compatible)
RUN mkdir -p kyutai/stt-1b-en_fr-candle
# Create download script for 1B multilingual model
RUN echo 'from huggingface_hub import hf_hub_download\n\
import os\n\
import subprocess\n\
\n\
os.makedirs("kyutai/stt-1b-en_fr-candle", exist_ok=True)\n\
print("πŸ“₯ Downloading 1B multilingual STT model for official moshi-server...")\n\
\n\
print("⬇️ Downloading model.safetensors...")\n\
hf_hub_download(\n\
repo_id="kyutai/stt-1b-en_fr-candle",\n\
filename="model.safetensors",\n\
local_dir="kyutai/stt-1b-en_fr-candle",\n\
local_dir_use_symlinks=False\n\
)\n\
\n\
print("⬇️ Downloading tokenizer (8000 vocab)...")\n\
hf_hub_download(\n\
repo_id="kyutai/stt-1b-en_fr-candle",\n\
filename="tokenizer_en_fr_audio_8000.model",\n\
local_dir="kyutai/stt-1b-en_fr-candle",\n\
local_dir_use_symlinks=False\n\
)\n\
\n\
print("⬇️ Downloading Mimi audio tokenizer...")\n\
hf_hub_download(\n\
repo_id="kyutai/stt-1b-en_fr-candle",\n\
filename="mimi-pytorch-e351c8d8@125.safetensors",\n\
local_dir="kyutai/stt-1b-en_fr-candle",\n\
local_dir_use_symlinks=False\n\
)\n\
\n\
print("βœ… All models downloaded successfully!")\n\
result = subprocess.run(["du", "-sh", "kyutai/"], capture_output=True, text=True)\n\
print("πŸ“Š Model files:", result.stdout)\n\
' > download_models.py
# Download models during build time
RUN python3 download_models.py
# Runtime stage
FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS runtime
# Install Python and runtime dependencies including audio libraries for moshi-server
RUN apt-get update && apt-get install -y \
python3 \
python3-pip \
wget \
curl \
git \
sudo \
libopus0 \
libsndfile1 \
libasound2 \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies for web interface
RUN pip3 install gradio fastapi uvicorn websockets python-multipart msgpack
# Set working directory
WORKDIR /app
# Copy official moshi-server binary from builder
COPY --from=moshi-server-builder /usr/local/cargo/bin/moshi-server /usr/local/bin/
# Copy configuration files
COPY configs/ ./configs/
# Copy pre-downloaded models from model-downloader stage
COPY --from=model-downloader /app/models/ ./models/
# Copy Python app and requirements
COPY app.py .
COPY requirements.txt .
# Create user for HuggingFace Spaces
RUN useradd -m -u 1000 user
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH \
HF_HOME=/home/user/.cache/huggingface
# Create and set permissions for HuggingFace cache directory
RUN mkdir -p /home/user/.cache/huggingface && \
chown -R user:user /home/user/.cache
# Set proper permissions and ownership
RUN chown -R user:user /app
USER user
# Expose port (HuggingFace Spaces requirement)
EXPOSE 7860
# Create startup script that handles port constraints
RUN echo '#!/bin/bash\n\
echo "===== Application Startup at $(date) ====="\n\
echo "πŸš€ Starting Kyutai STT Server Moshi v4 with official moshi-server..."\n\
echo "πŸ“ Pre-loaded models:"\n\
ls -lah models/kyutai/stt-1b-en_fr-candle/ 2>/dev/null || echo "Model directory not found"\n\
echo "GPU Info:"\n\
nvidia-smi 2>/dev/null || echo "No GPU detected at runtime"\n\
echo "🌐 Official moshi-server endpoints:"\n\
echo " - STT Streaming: /api/asr-streaming (MessagePack protocol)"\n\
echo " - Health Check: /health"\n\
echo "Starting Python frontend and official moshi-server..."\n\
\n\
# Create required directories for moshi-server\n\
mkdir -p /app/static /app/logs\n\
\n\
# Check if port 7860 is available (HuggingFace requirement)\n\
if lsof -Pi :7860 -sTCP:LISTEN -t >/dev/null 2>&1; then\n\
echo "⚠️ Port 7860 already in use, exiting gracefully (PID: $$)"\n\
exit 0\n\
fi\n\
\n\
# Start Python web interface in background\n\
python3 app.py &\n\
\n\
# Start official moshi-server as main process\n\
# Note: moshi-server will bind to its default port, Python app proxies on 7860\n\
exec moshi-server worker --config configs/config-stt-en_fr-hf.toml\n\
' > /app/start.sh && chmod +x /app/start.sh
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# Run the combined server
CMD ["/app/start.sh"]