From b9d1f67d70885ddb9487e87fe6ddd8e7f6f793d1 Mon Sep 17 00:00:00 2001 From: koko210Serve Date: Sat, 7 Feb 2026 23:43:01 +0200 Subject: [PATCH] llama-swap-rocm now uses official image and adjusted accordingly --- Dockerfile.llamaswap-rocm | 54 +++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/Dockerfile.llamaswap-rocm b/Dockerfile.llamaswap-rocm index fff3911..dd092a3 100644 --- a/Dockerfile.llamaswap-rocm +++ b/Dockerfile.llamaswap-rocm @@ -39,30 +39,30 @@ WORKDIR /app # Copy llama-swap binary from builder COPY --from=swap-builder /build/llama-swap-binary /app/llama-swap -# Make binary executable -RUN chmod +x /app/llama-swap - -# Create non-root user and add to GPU access groups -# The official llama.cpp image already has llama-server installed -# GID 187 = render group on host, GID 989 = video/kfd group on host -RUN groupadd -g 187 hostrender && \ - groupadd -g 989 hostvideo && \ - useradd -m -u 1000 -G hostrender,hostvideo llamaswap && \ - chown -R llamaswap:llamaswap /app - -# Set environment for ROCm (RX 6800 is gfx1030) -ENV HSA_OVERRIDE_GFX_VERSION=10.3.0 -ENV ROCM_PATH=/opt/rocm -ENV HIP_VISIBLE_DEVICES=0 - -USER llamaswap - -# Expose port -EXPOSE 8080 - -# Health check -HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ - CMD curl -f http://localhost:8080/health || exit 1 - -# Run llama-swap -CMD ["/app/llama-swap", "-config", "/app/config.yaml", "-listen", "0.0.0.0:8080"] + # Make binaries executable + RUN chmod +x /app/llama-swap + + # Add existing ubuntu user (UID 1000) to GPU access groups (using host GIDs) + # GID 187 = render group on host, GID 989 = video/kfd group on host + RUN groupadd -g 187 hostrender && \ + groupadd -g 989 hostvideo && \ + usermod -aG hostrender,hostvideo ubuntu && \ + chown -R ubuntu:ubuntu /app + + # Set environment for ROCm (RX 6800 is gfx1030) + ENV HSA_OVERRIDE_GFX_VERSION=10.3.0 + ENV ROCM_PATH=/opt/rocm + ENV HIP_VISIBLE_DEVICES=0 + + USER ubuntu + + # Expose port + EXPOSE 8080 + + # Health check + HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ + CMD curl -f http://localhost:8080/health || exit 1 + + # Override the base image's ENTRYPOINT and run llama-swap + ENTRYPOINT [] + CMD ["/app/llama-swap", "-config", "/app/config.yaml", "-listen", "0.0.0.0:8080"] \ No newline at end of file