diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..f10ec3a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +# .dockerignore for llama-swap-rocm (build context is project root) +# The Dockerfile.llamaswap-rocm doesn't COPY anything from the build context — +# everything is git-cloned in multi-stage builds. Exclude everything to avoid +# sending ~31 GB of unnecessary build context (models, backups, etc.) + +# Exclude everything by default +* + +# Only include what the Dockerfile actually needs (nothing from context currently) +# If the Dockerfile changes to COPY files, add exceptions here with !filename diff --git a/.gitignore b/.gitignore index 3e026de..940b4ee 100644 --- a/.gitignore +++ b/.gitignore @@ -37,9 +37,6 @@ models/*.bin *.log logs/ -# Docker -.dockerignore - # OS .DS_Store Thumbs.db diff --git a/face-detector/.dockerignore b/face-detector/.dockerignore new file mode 100644 index 0000000..5fdb6b1 --- /dev/null +++ b/face-detector/.dockerignore @@ -0,0 +1,6 @@ +# Exclude accumulated detection outputs (volume-mounted at runtime anyway) +api/outputs/ +api/__pycache__/ +__pycache__/ +*.pyc +images/ diff --git a/stt-realtime/Dockerfile b/stt-realtime/Dockerfile index eba464b..b6a8a8c 100644 --- a/stt-realtime/Dockerfile +++ b/stt-realtime/Dockerfile @@ -1,12 +1,12 @@ # RealtimeSTT Container # Uses Faster-Whisper with CUDA for GPU-accelerated inference -# Includes dual VAD (WebRTC + Silero) for robust voice detection +# Includes Silero VAD (ONNX, CPU-only) for robust voice detection # # Updated per RealtimeSTT PR #295: # - CUDA 12.8.1 (latest stable) -# - PyTorch 2.7.1 with cu128 support +# - PyTorch CPU-only (for Silero VAD tensor ops only - saves ~2.3 GB) +# - Faster-Whisper/CTranslate2 uses CUDA directly, no PyTorch GPU needed # - Ubuntu 24.04 base -# - Single Python 3.11 installation FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 @@ -27,7 +27,7 @@ RUN apt-get update && apt-get install -y \ curl \ && rm -rf /var/lib/apt/lists/* -# Install PyTorch with CUDA 12.8 support (installed first for layer caching) +# Install PyTorch CPU-only (for Silero VAD tensor ops - GPU transcription uses CTranslate2 directly) COPY requirements-gpu-torch.txt . RUN python3 -m pip install --break-system-packages --no-cache-dir -r requirements-gpu-torch.txt diff --git a/stt-realtime/requirements-gpu-torch.txt b/stt-realtime/requirements-gpu-torch.txt index f3187ca..aa9b9ea 100644 --- a/stt-realtime/requirements-gpu-torch.txt +++ b/stt-realtime/requirements-gpu-torch.txt @@ -1,5 +1,7 @@ -# PyTorch with CUDA 12.8 support -# Updated per RealtimeSTT PR #295 for better performance -torch==2.7.1+cu128 -torchaudio==2.7.1+cu128 ---index-url https://download.pytorch.org/whl/cu128 +# PyTorch CPU-only (used solely for Silero VAD which runs on CPU) +# Silero VAD's OnnxWrapper uses torch tensors internally but does not need GPU. +# Faster-Whisper/CTranslate2 handles GPU transcription via CUDA directly. +# torchaudio is required by silero-vad's utils_vad.py top-level import. +torch==2.7.1+cpu +torchaudio==2.7.1+cpu +--index-url https://download.pytorch.org/whl/cpu diff --git a/stt-realtime/requirements.txt b/stt-realtime/requirements.txt index 3fac6c8..3985514 100644 --- a/stt-realtime/requirements.txt +++ b/stt-realtime/requirements.txt @@ -9,8 +9,8 @@ ctranslate2>=4.4.0 # Audio processing soundfile>=0.12.0 -# VAD - Silero (loaded via torch.hub) -# No explicit package needed, comes with torch +# VAD - Silero (loaded via torch.hub, runs on CPU via ONNX) +# Requires torch (CPU-only) - see requirements-gpu-torch.txt # Utilities aiohttp>=3.9.0