diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..f10ec3a
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,10 @@
+# .dockerignore for llama-swap-rocm (build context is project root)
+# The Dockerfile.llamaswap-rocm doesn't COPY anything from the build context —
+# everything is git-cloned in multi-stage builds. Exclude everything to avoid
+# sending ~31 GB of unnecessary build context (models, backups, etc.)
+
+# Exclude everything by default
+*
+
+# Only include what the Dockerfile actually needs (nothing from context currently)
+# If the Dockerfile changes to COPY files, add exceptions here with !filename
diff --git a/.gitignore b/.gitignore
index 3e026de..940b4ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,9 +37,6 @@ models/*.bin
 *.log
 logs/
 
-# Docker
-.dockerignore
-
 # OS
 .DS_Store
 Thumbs.db
diff --git a/face-detector/.dockerignore b/face-detector/.dockerignore
new file mode 100644
index 0000000..5fdb6b1
--- /dev/null
+++ b/face-detector/.dockerignore
@@ -0,0 +1,6 @@
+# Exclude accumulated detection outputs (volume-mounted at runtime anyway)
+api/outputs/
+api/__pycache__/
+__pycache__/
+*.pyc
+images/
diff --git a/stt-realtime/Dockerfile b/stt-realtime/Dockerfile
index eba464b..b6a8a8c 100644
--- a/stt-realtime/Dockerfile
+++ b/stt-realtime/Dockerfile
@@ -1,12 +1,12 @@
 # RealtimeSTT Container
 # Uses Faster-Whisper with CUDA for GPU-accelerated inference
-# Includes dual VAD (WebRTC + Silero) for robust voice detection
+# Includes Silero VAD (ONNX, CPU-only) for robust voice detection
 #
 # Updated per RealtimeSTT PR #295:
 # - CUDA 12.8.1 (latest stable)
-# - PyTorch 2.7.1 with cu128 support
+# - PyTorch CPU-only (for Silero VAD tensor ops only - saves ~2.3 GB)
+# - Faster-Whisper/CTranslate2 uses CUDA directly, no PyTorch GPU needed
 # - Ubuntu 24.04 base
-# - Single Python 3.11 installation
 
 FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04
 
@@ -27,7 +27,7 @@ RUN apt-get update && apt-get install -y \
     curl \
     && rm -rf /var/lib/apt/lists/*
 
-# Install PyTorch with CUDA 12.8 support (installed first for layer caching)
+# Install PyTorch CPU-only (for Silero VAD tensor ops - GPU transcription uses CTranslate2 directly)
 COPY requirements-gpu-torch.txt .
 RUN python3 -m pip install --break-system-packages --no-cache-dir -r requirements-gpu-torch.txt
 
diff --git a/stt-realtime/requirements-gpu-torch.txt b/stt-realtime/requirements-gpu-torch.txt
index f3187ca..aa9b9ea 100644
--- a/stt-realtime/requirements-gpu-torch.txt
+++ b/stt-realtime/requirements-gpu-torch.txt
@@ -1,5 +1,7 @@
-# PyTorch with CUDA 12.8 support
-# Updated per RealtimeSTT PR #295 for better performance
-torch==2.7.1+cu128
-torchaudio==2.7.1+cu128
---index-url https://download.pytorch.org/whl/cu128
+# PyTorch CPU-only (used solely for Silero VAD which runs on CPU)
+# Silero VAD's OnnxWrapper uses torch tensors internally but does not need GPU.
+# Faster-Whisper/CTranslate2 handles GPU transcription via CUDA directly.
+# torchaudio is required by silero-vad's utils_vad.py top-level import.
+torch==2.7.1+cpu
+torchaudio==2.7.1+cpu
+--index-url https://download.pytorch.org/whl/cpu
diff --git a/stt-realtime/requirements.txt b/stt-realtime/requirements.txt
index 3fac6c8..3985514 100644
--- a/stt-realtime/requirements.txt
+++ b/stt-realtime/requirements.txt
@@ -9,8 +9,8 @@ ctranslate2>=4.4.0
 # Audio processing
 soundfile>=0.12.0
 
-# VAD - Silero (loaded via torch.hub)
-# No explicit package needed, comes with torch
+# VAD - Silero (loaded via torch.hub, runs on CPU via ONNX)
+# Requires torch (CPU-only) - see requirements-gpu-torch.txt
 
 # Utilities
 aiohttp>=3.9.0