Dockerfile.llamaswap-rocm

# Multi-stage build for llama-swap with ROCm support
# Now using official llama.cpp ROCm image (PR #18439 merged Dec 29, 2025)

# Stage 1: Build llama-swap UI
FROM node:22-alpine AS ui-builder

WORKDIR /build

# Install git
RUN apk add --no-cache git

# Clone llama-swap
RUN git clone https://github.com/mostlygeek/llama-swap.git

# Build UI (now in ui-svelte directory)
WORKDIR /build/llama-swap/ui-svelte
RUN npm install && npm run build

# Stage 2: Build llama-swap binary
FROM golang:1.23-alpine AS swap-builder

WORKDIR /build

# Install git
RUN apk add --no-cache git

# Copy llama-swap source with built UI
COPY --from=ui-builder /build/llama-swap /build/llama-swap

# Build llama-swap binary
WORKDIR /build/llama-swap
RUN GOTOOLCHAIN=auto go build -o /build/llama-swap-binary .

# Stage 3: Final runtime image using official llama.cpp ROCm image
FROM ghcr.io/ggml-org/llama.cpp:server-rocm

WORKDIR /app

# Copy llama-swap binary from builder
COPY --from=swap-builder /build/llama-swap-binary /app/llama-swap

    # Make binaries executable
    RUN chmod +x /app/llama-swap
    
    # Add existing ubuntu user (UID 1000) to GPU access groups (using host GIDs)
    # GID 187 = render group on host, GID 989 = video/kfd group on host
    RUN groupadd -g 187 hostrender && \
        groupadd -g 989 hostvideo && \
        usermod -aG hostrender,hostvideo ubuntu && \
        chown -R ubuntu:ubuntu /app
    
    # Set environment for ROCm (RX 6800 is gfx1030)
    ENV HSA_OVERRIDE_GFX_VERSION=10.3.0
    ENV ROCM_PATH=/opt/rocm
    ENV HIP_VISIBLE_DEVICES=0
    
    USER ubuntu
    
    # Expose port
    EXPOSE 8080
    
    # Health check
    HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
      CMD curl -f http://localhost:8080/health || exit 1
    
    # Override the base image's ENTRYPOINT and run llama-swap
    ENTRYPOINT []
    CMD ["/app/llama-swap", "-config", "/app/config.yaml", "-listen", "0.0.0.0:8080"]