Dockerfile.llamaswap-rocm

# Multi-stage build for llama-swap with ROCm support
# Now using official llama.cpp ROCm image (PR #18439 merged Dec 29, 2025)

# Stage 1: Build llama-swap UI
FROM node:22-alpine AS ui-builder

WORKDIR /build

# Install git
RUN apk add --no-cache git

# Clone llama-swap
RUN git clone https://github.com/mostlygeek/llama-swap.git

# Build UI (now in ui-svelte directory)
WORKDIR /build/llama-swap/ui-svelte
RUN npm install && npm run build

# Stage 2: Build llama-swap binary
FROM golang:1.23-alpine AS swap-builder

WORKDIR /build

# Install git
RUN apk add --no-cache git

# Copy llama-swap source with built UI
COPY --from=ui-builder /build/llama-swap /build/llama-swap

# Build llama-swap binary
WORKDIR /build/llama-swap
RUN GOTOOLCHAIN=auto go build -o /build/llama-swap-binary .

# Stage 3: Final runtime image using official llama.cpp ROCm image
FROM ghcr.io/ggml-org/llama.cpp:server-rocm

WORKDIR /app

# Copy llama-swap binary from builder
COPY --from=swap-builder /build/llama-swap-binary /app/llama-swap

# Make binary executable
RUN chmod +x /app/llama-swap

# Create non-root user and add to GPU access groups
# The official llama.cpp image already has llama-server installed
# GID 187 = render group on host, GID 989 = video/kfd group on host
RUN groupadd -g 187 hostrender && \
    groupadd -g 989 hostvideo && \
    useradd -m -u 1000 -G hostrender,hostvideo llamaswap && \
    chown -R llamaswap:llamaswap /app

# Set environment for ROCm (RX 6800 is gfx1030)
ENV HSA_OVERRIDE_GFX_VERSION=10.3.0
ENV ROCM_PATH=/opt/rocm
ENV HIP_VISIBLE_DEVICES=0

USER llamaswap

# Expose port
EXPOSE 8080

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
  CMD curl -f http://localhost:8080/health || exit 1

# Run llama-swap
CMD ["/app/llama-swap", "-config", "/app/config.yaml", "-listen", "0.0.0.0:8080"]