60 lines
1.5 KiB
Docker
60 lines
1.5 KiB
Docker
# Parakeet ONNX ASR STT Container
|
|
# Uses ONNX Runtime with CUDA for GPU-accelerated inference
|
|
# Optimized for NVIDIA GTX 1660 and similar GPUs
|
|
# Using CUDA 12.6 with cuDNN 9 for ONNX Runtime GPU support
|
|
|
|
FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04
|
|
|
|
# Prevent interactive prompts during build
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
ENV PYTHONUNBUFFERED=1
|
|
|
|
# Set working directory
|
|
WORKDIR /app
|
|
|
|
# Install system dependencies
|
|
RUN apt-get update && apt-get install -y \
|
|
python3.11 \
|
|
python3.11-venv \
|
|
python3.11-dev \
|
|
python3-pip \
|
|
build-essential \
|
|
ffmpeg \
|
|
libsndfile1 \
|
|
libportaudio2 \
|
|
portaudio19-dev \
|
|
git \
|
|
curl \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Upgrade pip to exact version used in requirements
|
|
RUN python3.11 -m pip install --upgrade pip==25.3
|
|
|
|
# Copy requirements first (for Docker layer caching)
|
|
COPY requirements-stt.txt .
|
|
|
|
# Install Python dependencies
|
|
RUN python3.11 -m pip install --no-cache-dir -r requirements-stt.txt
|
|
|
|
# Copy application code
|
|
COPY asr/ ./asr/
|
|
COPY server/ ./server/
|
|
COPY vad/ ./vad/
|
|
COPY client/ ./client/
|
|
|
|
# Create models directory (models will be downloaded on first run)
|
|
RUN mkdir -p models/parakeet
|
|
|
|
# Expose WebSocket port
|
|
EXPOSE 8766
|
|
|
|
# Set GPU visibility (default to GPU 0)
|
|
ENV CUDA_VISIBLE_DEVICES=0
|
|
|
|
# Health check
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
|
CMD python3.11 -c "import onnxruntime as ort; assert 'CUDAExecutionProvider' in ort.get_available_providers()" || exit 1
|
|
|
|
# Run the WebSocket server
|
|
CMD ["python3.11", "-m", "server.ws_server"]
|