Implemented experimental real production ready voice chat, relegated old flow to voice debug mode. New Web UI panel for Voice Chat.

2026-01-20 23:06:17 +02:00
parent 362108f4b0
commit 2934efba22
31 changed files with 5408 additions and 357 deletions
--- a/stt-realtime/Dockerfile
+++ b/stt-realtime/Dockerfile
@@ -0,0 +1,58 @@
+# RealtimeSTT Container
+# Uses Faster-Whisper with CUDA for GPU-accelerated inference
+# Includes dual VAD (WebRTC + Silero) for robust voice detection
+
+FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04
+
+# Prevent interactive prompts during build
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    python3.11 \
+    python3.11-venv \
+    python3.11-dev \
+    python3-pip \
+    build-essential \
+    ffmpeg \
+    libsndfile1 \
+    libportaudio2 \
+    portaudio19-dev \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Upgrade pip
+RUN python3.11 -m pip install --upgrade pip
+
+# Copy requirements first (for Docker layer caching)
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN python3.11 -m pip install --no-cache-dir -r requirements.txt
+
+# Install PyTorch with CUDA 12.1 support (compatible with CUDA 12.6)
+RUN python3.11 -m pip install --no-cache-dir \
+    torch==2.5.1+cu121 \
+    torchaudio==2.5.1+cu121 \
+    --index-url https://download.pytorch.org/whl/cu121
+
+# Copy application code
+COPY stt_server.py .
+
+# Create models directory (models will be downloaded on first run)
+RUN mkdir -p /root/.cache/huggingface
+
+# Expose WebSocket port
+EXPOSE 8766
+
+# Health check - use netcat to check if port is listening
+HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
+    CMD python3.11 -c "import socket; s=socket.socket(); s.settimeout(2); s.connect(('localhost', 8766)); s.close()" || exit 1
+
+# Run the server
+CMD ["python3.11", "stt_server.py"]