version: '3.9' services: llama-swap: image: ghcr.io/mostlygeek/llama-swap:cuda container_name: llama-swap ports: - "8090:8080" # Map host port 8090 to container port 8080 volumes: - ./models:/models # GGUF model files - ./llama-swap-config.yaml:/app/config.yaml # llama-swap configuration runtime: nvidia restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/health"] interval: 10s timeout: 5s retries: 10 start_period: 30s # Give more time for initial model loading environment: - NVIDIA_VISIBLE_DEVICES=all llama-swap-amd: build: context: . dockerfile: Dockerfile.llamaswap-rocm container_name: llama-swap-amd ports: - "8091:8080" # Map host port 8091 to container port 8080 volumes: - ./models:/models # GGUF model files - ./llama-swap-rocm-config.yaml:/app/config.yaml # llama-swap configuration for AMD devices: - /dev/kfd:/dev/kfd - /dev/dri:/dev/dri group_add: - "985" # video group - "989" # render group restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/health"] interval: 10s timeout: 5s retries: 10 start_period: 30s # Give more time for initial model loading environment: - HSA_OVERRIDE_GFX_VERSION=10.3.0 # RX 6800 compatibility - ROCM_PATH=/opt/rocm - HIP_VISIBLE_DEVICES=0 # Use first AMD GPU - GPU_DEVICE_ORDINAL=0 miku-bot: build: ./bot container_name: miku-bot volumes: - ./bot/memory:/app/memory - /home/koko210Serve/ComfyUI/output:/app/ComfyUI/output:ro - /var/run/docker.sock:/var/run/docker.sock # Allow container management depends_on: llama-swap: condition: service_healthy llama-swap-amd: condition: service_healthy environment: - DISCORD_BOT_TOKEN=MTM0ODAyMjY0Njc3NTc0NjY1MQ.GXsxML.nNCDOplmgNxKgqdgpAomFM2PViX10GjxyuV8uw - LLAMA_URL=http://llama-swap:8080 - LLAMA_AMD_URL=http://llama-swap-amd:8080 # Secondary AMD GPU endpoint - TEXT_MODEL=llama3.1 - VISION_MODEL=vision - OWNER_USER_ID=209381657369772032 # Your Discord user ID for DM analysis reports - FACE_DETECTOR_STARTUP_TIMEOUT=60 ports: - "3939:3939" networks: - default # Stay on default for llama-swap communication - miku-voice # Connect to voice network for RVC/TTS restart: unless-stopped anime-face-detector: build: ./face-detector container_name: anime-face-detector runtime: nvidia deploy: resources: reservations: devices: - capabilities: [gpu] volumes: - ./face-detector/api:/app/api - ./face-detector/images:/app/images ports: - "7860:7860" # Gradio UI - "6078:6078" # FastAPI API environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility restart: "no" # Don't auto-restart - only run on-demand profiles: - tools # Don't start by default networks: miku-voice: external: true name: miku-voice-network