Implemented experimental real production ready voice chat, relegated old flow to voice debug mode. New Web UI panel for Voice Chat.

This commit is contained in:
2026-01-20 23:06:17 +02:00
parent 362108f4b0
commit 2934efba22
31 changed files with 5408 additions and 357 deletions

58
stt-realtime/Dockerfile Normal file
View File

@@ -0,0 +1,58 @@
# RealtimeSTT Container
# Uses Faster-Whisper with CUDA for GPU-accelerated inference
# Includes dual VAD (WebRTC + Silero) for robust voice detection
FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04
# Prevent interactive prompts during build
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
python3.11 \
python3.11-venv \
python3.11-dev \
python3-pip \
build-essential \
ffmpeg \
libsndfile1 \
libportaudio2 \
portaudio19-dev \
git \
curl \
&& rm -rf /var/lib/apt/lists/*
# Upgrade pip
RUN python3.11 -m pip install --upgrade pip
# Copy requirements first (for Docker layer caching)
COPY requirements.txt .
# Install Python dependencies
RUN python3.11 -m pip install --no-cache-dir -r requirements.txt
# Install PyTorch with CUDA 12.1 support (compatible with CUDA 12.6)
RUN python3.11 -m pip install --no-cache-dir \
torch==2.5.1+cu121 \
torchaudio==2.5.1+cu121 \
--index-url https://download.pytorch.org/whl/cu121
# Copy application code
COPY stt_server.py .
# Create models directory (models will be downloaded on first run)
RUN mkdir -p /root/.cache/huggingface
# Expose WebSocket port
EXPOSE 8766
# Health check - use netcat to check if port is listening
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD python3.11 -c "import socket; s=socket.socket(); s.settimeout(2); s.connect(('localhost', 8766)); s.close()" || exit 1
# Run the server
CMD ["python3.11", "stt_server.py"]