# Multi-stage build for llama-swap with ROCm support # Now using official llama.cpp ROCm image (PR #18439 merged Dec 29, 2025) # Stage 1: Build llama-swap UI FROM node:22-alpine AS ui-builder WORKDIR /build # Install git RUN apk add --no-cache git # Clone llama-swap RUN git clone https://github.com/mostlygeek/llama-swap.git # Build UI (now in ui-svelte directory) WORKDIR /build/llama-swap/ui-svelte RUN npm install && npm run build # Stage 2: Build llama-swap binary FROM golang:1.23-alpine AS swap-builder WORKDIR /build # Install git RUN apk add --no-cache git # Copy llama-swap source with built UI COPY --from=ui-builder /build/llama-swap /build/llama-swap # Build llama-swap binary WORKDIR /build/llama-swap RUN GOTOOLCHAIN=auto go build -o /build/llama-swap-binary . # Stage 3: Final runtime image using official llama.cpp ROCm image FROM ghcr.io/ggml-org/llama.cpp:server-rocm WORKDIR /app # Copy llama-swap binary from builder COPY --from=swap-builder /build/llama-swap-binary /app/llama-swap # Make binaries executable RUN chmod +x /app/llama-swap # Add existing ubuntu user (UID 1000) to GPU access groups (using host GIDs) # GID 187 = render group on host, GID 989 = video/kfd group on host RUN groupadd -g 187 hostrender && \ groupadd -g 989 hostvideo && \ usermod -aG hostrender,hostvideo ubuntu && \ chown -R ubuntu:ubuntu /app # Set environment for ROCm (RX 6800 is gfx1030) ENV HSA_OVERRIDE_GFX_VERSION=10.3.0 ENV ROCM_PATH=/opt/rocm ENV HIP_VISIBLE_DEVICES=0 USER ubuntu # Expose port EXPOSE 8080 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ CMD curl -f http://localhost:8080/health || exit 1 # Override the base image's ENTRYPOINT and run llama-swap ENTRYPOINT [] CMD ["/app/llama-swap", "-config", "/app/config.yaml", "-listen", "0.0.0.0:8080"]