Dockerfile.llamaswap-rocm

# Multi-stage build for llama-swap with ROCm support
# Stage 1: Build llama.cpp with ROCm (requires ROCm 6.1+)
FROM rocm/dev-ubuntu-22.04:6.2.4 AS llama-builder

WORKDIR /build

# Install build dependencies including ROCm/HIP development libraries
RUN apt-get update && apt-get install -y \
    git \
    build-essential \
    cmake \
    wget \
    libcurl4-openssl-dev \
    hip-dev \
    hipblas-dev \
    rocblas-dev \
    && rm -rf /var/lib/apt/lists/*

# Clone and build llama.cpp with HIP/ROCm support (gfx1030 = RX 6800)
RUN git clone https://github.com/ggml-org/llama.cpp.git && \
    cd llama.cpp && \
    HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
    cmake -S . -B build -DGGML_HIP=ON -DGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release && \
    cmake --build build --config Release -- -j$(nproc) && \
    cp build/bin/llama-server /build/llama-server && \
    find build -name "*.so*" -exec cp {} /build/ \;

# Stage 2: Build llama-swap UI and binary
FROM node:22-alpine AS ui-builder

WORKDIR /build

# Install git
RUN apk add --no-cache git

# Clone llama-swap
RUN git clone https://github.com/mostlygeek/llama-swap.git

# Build UI
WORKDIR /build/llama-swap/ui
RUN npm install && npm run build

# Stage 3: Build llama-swap binary
FROM golang:1.23-alpine AS swap-builder

WORKDIR /build

# Install git
RUN apk add --no-cache git

# Copy llama-swap source with built UI
COPY --from=ui-builder /build/llama-swap /build/llama-swap

# Build llama-swap binary
WORKDIR /build/llama-swap
RUN GOTOOLCHAIN=auto go build -o /build/llama-swap-binary .

# Stage 4: Final runtime image
FROM rocm/dev-ubuntu-22.04:6.2.4

WORKDIR /app

# Install runtime dependencies including additional ROCm libraries
RUN apt-get update && apt-get install -y \
    curl \
    ca-certificates \
    rocm-libs \
    && rm -rf /var/lib/apt/lists/*

# Copy built binaries and shared libraries from previous stages
COPY --from=llama-builder /build/llama-server /app/llama-server
COPY --from=llama-builder /build/*.so* /app/
COPY --from=swap-builder /build/llama-swap-binary /app/llama-swap

# Make binaries executable
RUN chmod +x /app/llama-server /app/llama-swap

# Create user and add to GPU access groups (using host GIDs)
# GID 187 = render group on host, GID 989 = video/kfd group on host
RUN groupadd -g 187 hostrender && \
    groupadd -g 989 hostvideo && \
    useradd -m -u 1000 -G hostrender,hostvideo llamaswap && \
    chown -R llamaswap:llamaswap /app

# Set environment for ROCm (RX 6800 is gfx1030)
ENV HSA_OVERRIDE_GFX_VERSION=10.3.0
ENV ROCM_PATH=/opt/rocm
ENV HIP_VISIBLE_DEVICES=0
ENV LD_LIBRARY_PATH=/opt/rocm/lib:/app:$LD_LIBRARY_PATH

USER llamaswap

# Expose port
EXPOSE 8080

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
  CMD curl -f http://localhost:8080/health || exit 1

# Run llama-swap
CMD ["/app/llama-swap", "-config", "/app/config.yaml", "-listen", "0.0.0.0:8080"]
Add dual GPU support with web UI selector Features: - Built custom ROCm container for AMD RX 6800 GPU - Added GPU selection toggle in web UI (NVIDIA/AMD) - Unified model names across both GPUs for seamless switching - Vision model always uses NVIDIA GPU (optimal performance) - Text models (llama3.1, darkidol) can use either GPU - Added /gpu-status and /gpu-select API endpoints - Implemented GPU state persistence in memory/gpu_state.json Technical details: - Multi-stage Dockerfile.llamaswap-rocm with ROCm 6.2.4 - llama.cpp compiled with GGML_HIP=ON for gfx1030 (RX 6800) - Proper GPU permissions without root (groups 187/989) - AMD container on port 8091, NVIDIA on port 8090 - Updated bot/utils/llm.py with get_current_gpu_url() and get_vision_gpu_url() - Modified bot/utils/image_handling.py to always use NVIDIA for vision - Enhanced web UI with GPU selector button (blue=NVIDIA, red=AMD) Files modified: - docker-compose.yml (added llama-swap-amd service) - bot/globals.py (added LLAMA_AMD_URL) - bot/api.py (added GPU selection endpoints and helper function) - bot/utils/llm.py (GPU routing for text models) - bot/utils/image_handling.py (GPU routing for vision models) - bot/static/index.html (GPU selector UI) - llama-swap-rocm-config.yaml (unified model names) New files: - Dockerfile.llamaswap-rocm - bot/memory/gpu_state.json - bot/utils/gpu_router.py (load balancing utility) - setup-dual-gpu.sh (setup verification script) - DUAL_GPU_*.md (documentation files) 2026-01-09 00:03:59 +02:00			`# Multi-stage build for llama-swap with ROCm support`
			`# Stage 1: Build llama.cpp with ROCm (requires ROCm 6.1+)`
			`FROM rocm/dev-ubuntu-22.04:6.2.4 AS llama-builder`

			`WORKDIR /build`

			`# Install build dependencies including ROCm/HIP development libraries`
			`RUN apt-get update && apt-get install -y \`
			`git \`
			`build-essential \`
			`cmake \`
			`wget \`
			`libcurl4-openssl-dev \`
			`hip-dev \`
			`hipblas-dev \`
			`rocblas-dev \`
			`&& rm -rf /var/lib/apt/lists/*`

			`# Clone and build llama.cpp with HIP/ROCm support (gfx1030 = RX 6800)`
			`RUN git clone https://github.com/ggml-org/llama.cpp.git && \`
			`cd llama.cpp && \`
			`HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \`
			`cmake -S . -B build -DGGML_HIP=ON -DGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release && \`
			`cmake --build build --config Release -- -j$(nproc) && \`
			`cp build/bin/llama-server /build/llama-server && \`
			`find build -name ".so" -exec cp {} /build/ \;`

			`# Stage 2: Build llama-swap UI and binary`
			`FROM node:22-alpine AS ui-builder`

			`WORKDIR /build`

			`# Install git`
			`RUN apk add --no-cache git`

			`# Clone llama-swap`
			`RUN git clone https://github.com/mostlygeek/llama-swap.git`

			`# Build UI`
			`WORKDIR /build/llama-swap/ui`
			`RUN npm install && npm run build`

			`# Stage 3: Build llama-swap binary`
			`FROM golang:1.23-alpine AS swap-builder`

			`WORKDIR /build`

			`# Install git`
			`RUN apk add --no-cache git`

			`# Copy llama-swap source with built UI`
			`COPY --from=ui-builder /build/llama-swap /build/llama-swap`

			`# Build llama-swap binary`
			`WORKDIR /build/llama-swap`
			`RUN GOTOOLCHAIN=auto go build -o /build/llama-swap-binary .`

			`# Stage 4: Final runtime image`
			`FROM rocm/dev-ubuntu-22.04:6.2.4`

			`WORKDIR /app`

			`# Install runtime dependencies including additional ROCm libraries`
			`RUN apt-get update && apt-get install -y \`
			`curl \`
			`ca-certificates \`
			`rocm-libs \`
			`&& rm -rf /var/lib/apt/lists/*`

			`# Copy built binaries and shared libraries from previous stages`
			`COPY --from=llama-builder /build/llama-server /app/llama-server`
			`COPY --from=llama-builder /build/.so /app/`
			`COPY --from=swap-builder /build/llama-swap-binary /app/llama-swap`

			`# Make binaries executable`
			`RUN chmod +x /app/llama-server /app/llama-swap`

			`# Create user and add to GPU access groups (using host GIDs)`
			`# GID 187 = render group on host, GID 989 = video/kfd group on host`
			`RUN groupadd -g 187 hostrender && \`
			`groupadd -g 989 hostvideo && \`
			`useradd -m -u 1000 -G hostrender,hostvideo llamaswap && \`
			`chown -R llamaswap:llamaswap /app`

			`# Set environment for ROCm (RX 6800 is gfx1030)`
			`ENV HSA_OVERRIDE_GFX_VERSION=10.3.0`
			`ENV ROCM_PATH=/opt/rocm`
			`ENV HIP_VISIBLE_DEVICES=0`
			`ENV LD_LIBRARY_PATH=/opt/rocm/lib:/app:$LD_LIBRARY_PATH`

			`USER llamaswap`

			`# Expose port`
			`EXPOSE 8080`

			`# Health check`
			`HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \`
			`CMD curl -f http://localhost:8080/health \|\| exit 1`

			`# Run llama-swap`
			`CMD ["/app/llama-swap", "-config", "/app/config.yaml", "-listen", "0.0.0.0:8080"]`