diff --git a/bot/requirements.txt b/bot/requirements.txt index 512de89..d5fad8b 100644 --- a/bot/requirements.txt +++ b/bot/requirements.txt @@ -14,6 +14,7 @@ Pillow opencv-contrib-python numpy transformers +scikit-learn PyNaCl>=1.5.0 websockets>=12.0 discord-ext-voice-recv diff --git a/stt-realtime/Dockerfile b/stt-realtime/Dockerfile index 7b64fc5..eba464b 100644 --- a/stt-realtime/Dockerfile +++ b/stt-realtime/Dockerfile @@ -1,8 +1,14 @@ # RealtimeSTT Container # Uses Faster-Whisper with CUDA for GPU-accelerated inference # Includes dual VAD (WebRTC + Silero) for robust voice detection +# +# Updated per RealtimeSTT PR #295: +# - CUDA 12.8.1 (latest stable) +# - PyTorch 2.7.1 with cu128 support +# - Ubuntu 24.04 base +# - Single Python 3.11 installation -FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04 +FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 # Prevent interactive prompts during build ENV DEBIAN_FRONTEND=noninteractive @@ -11,35 +17,23 @@ ENV PYTHONUNBUFFERED=1 # Set working directory WORKDIR /app -# Install system dependencies +# Install system dependencies (Ubuntu 24.04 has Python 3.12 by default) RUN apt-get update && apt-get install -y \ - python3.11 \ - python3.11-venv \ - python3.11-dev \ python3-pip \ - build-essential \ ffmpeg \ libsndfile1 \ libportaudio2 \ - portaudio19-dev \ git \ curl \ && rm -rf /var/lib/apt/lists/* -# Upgrade pip -RUN python3.11 -m pip install --upgrade pip +# Install PyTorch with CUDA 12.8 support (installed first for layer caching) +COPY requirements-gpu-torch.txt . +RUN python3 -m pip install --break-system-packages --no-cache-dir -r requirements-gpu-torch.txt -# Copy requirements first (for Docker layer caching) +# Copy and install other Python dependencies COPY requirements.txt . - -# Install Python dependencies -RUN python3.11 -m pip install --no-cache-dir -r requirements.txt - -# Install PyTorch with CUDA 12.1 support (compatible with CUDA 12.6) -RUN python3.11 -m pip install --no-cache-dir \ - torch==2.5.1+cu121 \ - torchaudio==2.5.1+cu121 \ - --index-url https://download.pytorch.org/whl/cu121 +RUN python3 -m pip install --break-system-packages --no-cache-dir -r requirements.txt # Copy application code COPY stt_server.py . @@ -52,7 +46,7 @@ EXPOSE 8766 # Health check - use netcat to check if port is listening HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ - CMD python3.11 -c "import socket; s=socket.socket(); s.settimeout(2); s.connect(('localhost', 8766)); s.close()" || exit 1 + CMD python3 -c "import socket; s=socket.socket(); s.settimeout(2); s.connect(('localhost', 8766)); s.close()" || exit 1 # Run the server -CMD ["python3.11", "stt_server.py"] +CMD ["python3", "stt_server.py"] diff --git a/stt-realtime/requirements-gpu-torch.txt b/stt-realtime/requirements-gpu-torch.txt new file mode 100644 index 0000000..f3187ca --- /dev/null +++ b/stt-realtime/requirements-gpu-torch.txt @@ -0,0 +1,5 @@ +# PyTorch with CUDA 12.8 support +# Updated per RealtimeSTT PR #295 for better performance +torch==2.7.1+cu128 +torchaudio==2.7.1+cu128 +--index-url https://download.pytorch.org/whl/cu128