# RealtimeSTT Container # Uses Faster-Whisper with CUDA for GPU-accelerated inference # Includes dual VAD (WebRTC + Silero) for robust voice detection # # Updated per RealtimeSTT PR #295: # - CUDA 12.8.1 (latest stable) # - PyTorch 2.7.1 with cu128 support # - Ubuntu 24.04 base # - Single Python 3.11 installation FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 # Prevent interactive prompts during build ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 # Set working directory WORKDIR /app # Install system dependencies (Ubuntu 24.04 has Python 3.12 by default) RUN apt-get update && apt-get install -y \ python3-pip \ ffmpeg \ libsndfile1 \ libportaudio2 \ git \ curl \ && rm -rf /var/lib/apt/lists/* # Install PyTorch with CUDA 12.8 support (installed first for layer caching) COPY requirements-gpu-torch.txt . RUN python3 -m pip install --break-system-packages --no-cache-dir -r requirements-gpu-torch.txt # Copy and install other Python dependencies COPY requirements.txt . RUN python3 -m pip install --break-system-packages --no-cache-dir -r requirements.txt # Copy application code COPY stt_server.py . # Create models directory (models will be downloaded on first run) RUN mkdir -p /root/.cache/huggingface # Expose WebSocket port EXPOSE 8766 # Health check - use netcat to check if port is listening HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ CMD python3 -c "import socket; s=socket.socket(); s.settimeout(2); s.connect(('localhost', 8766)); s.close()" || exit 1 # Run the server CMD ["python3", "stt_server.py"]