Spaces:

pgits
/

stt-gpu-service-moshi-v4

Runtime error

Peter Michael Gits

Add required log_dir field to moshi-server TOML config

8726325 4 months ago

6.11 kB

	# Multi-stage Docker build following our proven HuggingFace pattern
	# This approach uses official moshi-server instead of custom implementation

	# Build argument for CUDA compute capability (T4 = 75)
	ARG CUDA_COMPUTE_CAP=75

	FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder

	# Install system dependencies for moshi-server
	RUN apt-get update && apt-get install -y \
	curl \
	build-essential \
	pkg-config \
	libssl-dev \
	libsndfile1-dev \
	libasound2-dev \
	wget \
	ca-certificates \
	git \
	cmake \
	libprotobuf-dev \
	protobuf-compiler \
	python3-dev \
	&& rm -rf /var/lib/apt/lists/*

	# Install Rust
	ENV RUSTUP_HOME=/usr/local/rustup
	ENV CARGO_HOME=/usr/local/cargo
	ENV PATH=/usr/local/cargo/bin:$PATH
	ENV RUST_VERSION=stable

	RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \| sh -s -- -y --default-toolchain $RUST_VERSION
	RUN chmod -R a+w $RUSTUP_HOME $CARGO_HOME

	WORKDIR /app

	# Official moshi-server installation stage
	FROM base-builder AS moshi-server-builder
	ARG CUDA_COMPUTE_CAP
	ENV CUDA_COMPUTE_CAP=${CUDA_COMPUTE_CAP}

	# Set build optimizations to prevent hangs
	ENV CARGO_NET_RETRY=10
	ENV CARGO_HTTP_TIMEOUT=300
	ENV CARGO_HTTP_LOW_SPEED_LIMIT=10
	ENV CARGO_BUILD_JOBS=4

	# Install official moshi-server with CUDA support
	# Note: Official moshi-server only has 'cuda' feature, no architecture-specific variants
	RUN cargo install --features cuda moshi-server

	# Model download stage - pre-load models into image
	FROM base-builder AS model-downloader

	# Install Python and pip for model downloading
	RUN apt-get update && apt-get install -y python3 python3-pip && rm -rf /var/lib/apt/lists/*

	# Install huggingface-hub for model downloading
	RUN pip3 install --no-cache-dir huggingface-hub

	# Set working directory for models first
	WORKDIR /app/models

	# Create models directory for 1B multilingual model (T4 GPU compatible)
	RUN mkdir -p kyutai/stt-1b-en_fr-candle

	# Create download script for 1B multilingual model
	RUN echo 'from huggingface_hub import hf_hub_download\n\
	import os\n\
	import subprocess\n\
	\n\
	os.makedirs("kyutai/stt-1b-en_fr-candle", exist_ok=True)\n\
	print("📥 Downloading 1B multilingual STT model for official moshi-server...")\n\
	\n\
	print("⬇️ Downloading model.safetensors...")\n\
	hf_hub_download(\n\
	repo_id="kyutai/stt-1b-en_fr-candle",\n\
	filename="model.safetensors",\n\
	local_dir="kyutai/stt-1b-en_fr-candle",\n\
	local_dir_use_symlinks=False\n\
	)\n\
	\n\
	print("⬇️ Downloading tokenizer (8000 vocab)...")\n\
	hf_hub_download(\n\
	repo_id="kyutai/stt-1b-en_fr-candle",\n\
	filename="tokenizer_en_fr_audio_8000.model",\n\
	local_dir="kyutai/stt-1b-en_fr-candle",\n\
	local_dir_use_symlinks=False\n\
	)\n\
	\n\
	print("⬇️ Downloading Mimi audio tokenizer...")\n\
	hf_hub_download(\n\
	repo_id="kyutai/stt-1b-en_fr-candle",\n\
	filename="mimi-pytorch-e351c8d8@125.safetensors",\n\
	local_dir="kyutai/stt-1b-en_fr-candle",\n\
	local_dir_use_symlinks=False\n\
	)\n\
	\n\
	print("✅ All models downloaded successfully!")\n\
	result = subprocess.run(["du", "-sh", "kyutai/"], capture_output=True, text=True)\n\
	print("📊 Model files:", result.stdout)\n\
	' > download_models.py

	# Download models during build time
	RUN python3 download_models.py

	# Runtime stage
	FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS runtime

	# Install Python and runtime dependencies including audio libraries for moshi-server
	RUN apt-get update && apt-get install -y \
	python3 \
	python3-pip \
	wget \
	curl \
	git \
	sudo \
	libopus0 \
	libsndfile1 \
	libasound2 \
	&& rm -rf /var/lib/apt/lists/*

	# Install Python dependencies for web interface
	RUN pip3 install gradio fastapi uvicorn websockets python-multipart msgpack

	# Set working directory
	WORKDIR /app

	# Copy official moshi-server binary from builder
	COPY --from=moshi-server-builder /usr/local/cargo/bin/moshi-server /usr/local/bin/

	# Copy configuration files
	COPY configs/ ./configs/

	# Copy pre-downloaded models from model-downloader stage
	COPY --from=model-downloader /app/models/ ./models/

	# Copy Python app and requirements
	COPY app.py .
	COPY requirements.txt .

	# Create user for HuggingFace Spaces
	RUN useradd -m -u 1000 user
	ENV HOME=/home/user \
	PATH=/home/user/.local/bin:$PATH \
	HF_HOME=/home/user/.cache/huggingface

	# Create and set permissions for HuggingFace cache directory
	RUN mkdir -p /home/user/.cache/huggingface && \
	chown -R user:user /home/user/.cache

	# Set proper permissions and ownership
	RUN chown -R user:user /app
	USER user

	# Expose port (HuggingFace Spaces requirement)
	EXPOSE 7860

	# Create startup script that handles port constraints
	RUN echo '#!/bin/bash\n\
	echo "===== Application Startup at $(date) ====="\n\
	echo "🚀 Starting Kyutai STT Server Moshi v4 with official moshi-server..."\n\
	echo "📁 Pre-loaded models:"\n\
	ls -lah models/kyutai/stt-1b-en_fr-candle/ 2>/dev/null \|\| echo "Model directory not found"\n\
	echo "GPU Info:"\n\
	nvidia-smi 2>/dev/null \|\| echo "No GPU detected at runtime"\n\
	echo "🌐 Official moshi-server endpoints:"\n\
	echo " - STT Streaming: /api/asr-streaming (MessagePack protocol)"\n\
	echo " - Health Check: /health"\n\
	echo "Starting Python frontend and official moshi-server..."\n\
	\n\
	# Create required directories for moshi-server\n\
	mkdir -p /app/static /app/logs\n\
	\n\
	# Check if port 7860 is available (HuggingFace requirement)\n\
	if lsof -Pi :7860 -sTCP:LISTEN -t >/dev/null 2>&1; then\n\
	echo "⚠️ Port 7860 already in use, exiting gracefully (PID: $$)"\n\
	exit 0\n\
	fi\n\
	\n\
	# Start Python web interface in background\n\
	python3 app.py &\n\
	\n\
	# Start official moshi-server as main process\n\
	# Note: moshi-server will bind to its default port, Python app proxies on 7860\n\
	exec moshi-server worker --config configs/config-stt-en_fr-hf.toml\n\
	' > /app/start.sh && chmod +x /app/start.sh

	# Health check
	HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
	CMD curl -f http://localhost:7860/health \|\| exit 1

	# Run the combined server
	CMD ["/app/start.sh"]