Files
miku-discord/Dockerfile.llamaswap-rocm

68 lines
1.9 KiB
Docker

# Multi-stage build for llama-swap with ROCm support
# Now using official llama.cpp ROCm image (PR #18439 merged Dec 29, 2025)
# Stage 1: Build llama-swap UI
FROM node:22-alpine AS ui-builder
WORKDIR /build
# Install git
RUN apk add --no-cache git
# Clone llama-swap
RUN git clone https://github.com/mostlygeek/llama-swap.git
# Build UI (now in ui-svelte directory)
WORKDIR /build/llama-swap/ui-svelte
RUN npm install && npm run build
# Stage 2: Build llama-swap binary
FROM golang:1.23-alpine AS swap-builder
WORKDIR /build
# Install git
RUN apk add --no-cache git
# Copy llama-swap source with built UI
COPY --from=ui-builder /build/llama-swap /build/llama-swap
# Build llama-swap binary
WORKDIR /build/llama-swap
RUN GOTOOLCHAIN=auto go build -o /build/llama-swap-binary .
# Stage 3: Final runtime image using official llama.cpp ROCm image
FROM ghcr.io/ggml-org/llama.cpp:server-rocm
WORKDIR /app
# Copy llama-swap binary from builder
COPY --from=swap-builder /build/llama-swap-binary /app/llama-swap
# Make binaries executable
RUN chmod +x /app/llama-swap
# Add existing ubuntu user (UID 1000) to GPU access groups (using host GIDs)
# GID 187 = render group on host, GID 989 = video/kfd group on host
RUN groupadd -g 187 hostrender && \
groupadd -g 989 hostvideo && \
usermod -aG hostrender,hostvideo ubuntu && \
chown -R ubuntu:ubuntu /app
# Set environment for ROCm (RX 6800 is gfx1030)
ENV HSA_OVERRIDE_GFX_VERSION=10.3.0
ENV ROCM_PATH=/opt/rocm
ENV HIP_VISIBLE_DEVICES=0
USER ubuntu
# Expose port
EXPOSE 8080
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1
# Override the base image's ENTRYPOINT and run llama-swap
ENTRYPOINT []
CMD ["/app/llama-swap", "-config", "/app/config.yaml", "-listen", "0.0.0.0:8080"]