docker-compose.yml

services:
  # ========== LLM Backends ==========
  llama-swap:
    image: ghcr.io/mostlygeek/llama-swap:cuda
    container_name: llama-swap
    ports:
      - "8090:8080"  # Map host port 8090 to container port 8080
    volumes:
      - ./models:/models  # GGUF model files
      - ./llama-swap-config.yaml:/app/config.yaml  # llama-swap configuration
      - ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja  # Custom chat template
    runtime: nvidia
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 10s
      timeout: 5s
      retries: 10
      start_period: 30s  # Give more time for initial model loading
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - LOG_LEVEL=debug  # Enable verbose logging for llama-swap

  llama-swap-amd:
    build:
      context: .
      dockerfile: Dockerfile.llamaswap-rocm
    container_name: llama-swap-amd
    ports:
      - "8091:8080"  # Map host port 8091 to container port 8080
    volumes:
      - ./models:/models  # GGUF model files
      - ./llama-swap-rocm-config.yaml:/app/config.yaml  # llama-swap configuration for AMD
      - ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja  # Custom chat template
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
    group_add:
      - "985"  # video group
      - "989"  # render group
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 10s
      timeout: 5s
      retries: 10
      start_period: 30s  # Give more time for initial model loading
    environment:
      - HSA_OVERRIDE_GFX_VERSION=10.3.0  # RX 6800 compatibility
      - ROCM_PATH=/opt/rocm
      - HIP_VISIBLE_DEVICES=0  # Use first AMD GPU
      - GPU_DEVICE_ORDINAL=0

  # ========== Cheshire Cat AI (Memory & Personality) ==========
  cheshire-cat:
    image: ghcr.io/cheshire-cat-ai/core:1.6.2
    container_name: miku-cheshire-cat
    depends_on:
      cheshire-cat-vector-memory:
        condition: service_started
      llama-swap-amd:
        condition: service_healthy
    environment:
      - PYTHONUNBUFFERED=1
      - WATCHFILES_FORCE_POLLING=true
      - CORE_HOST=localhost
      - CORE_PORT=1865
      - QDRANT_HOST=cheshire-cat-vector-memory
      - QDRANT_PORT=6333
      - CORE_USE_SECURE_PROTOCOLS=false
      - API_KEY=
      - LOG_LEVEL=INFO
      - DEBUG=true
      - SAVE_MEMORY_SNAPSHOTS=false
      - OPENAI_API_BASE=http://llama-swap-amd:8080/v1
    ports:
      - "1865:80"  # Cat admin UI on host port 1865
    volumes:
      - ./cheshire-cat/cat/static:/app/cat/static
      - ./cat-plugins:/app/cat/plugins        # Shared plugins directory
      - ./cheshire-cat/cat/data:/app/cat/data  # Personality data (lore, prompts)
      # Mount canonical bot/persona/ files into Cat (single source of truth)
      - ./bot/persona/evil:/app/cat/data/evil
      - ./bot/persona/miku:/app/cat/data/miku
      - ./bot/moods:/app/moods                 # Mood description files
      - ./bot/memory:/app/memory               # Profile pictures and other memory files
      - ./cheshire-cat/cat/log.py:/app/cat/log.py  # Patched: fix loguru KeyError for third-party libs
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:80/"]
      interval: 15s
      timeout: 10s
      retries: 8
      start_period: 45s  # Cat takes a while to load embedder + plugins

  cheshire-cat-vector-memory:
    image: qdrant/qdrant:v1.9.1
    container_name: miku-qdrant
    environment:
      - LOG_LEVEL=INFO
    ports:
      - "6333:6333"  # Qdrant REST API (for debugging)
    ulimits:
      nofile:
        soft: 65536
        hard: 65536
    volumes:
      - ./cheshire-cat/cat/long_term_memory/vector:/qdrant/storage
    restart: unless-stopped

  # ========== Discord Bot ==========
  miku-bot:
    build: ./bot
    container_name: miku-bot
    environment:
      - TZ=Europe/Sofia
    volumes:
      - ./bot/memory:/app/memory
      - /home/koko210Serve/ComfyUI/output:/app/ComfyUI/output:ro
      - /var/run/docker.sock:/var/run/docker.sock  # Allow container management
      - ./.env:/app/.env:ro  # Mount .env file (read-only)
      - ./config.yaml:/app/config.yaml:ro  # Mount config file (read-only)
    depends_on:
      llama-swap:
        condition: service_healthy
      llama-swap-amd:
        condition: service_healthy
      cheshire-cat:
        condition: service_healthy
    env_file:
      - .env  # Load environment variables from .env file
    ports:
      - "3939:3939"
    networks:
      - default  # Stay on default for llama-swap + cheshire-cat communication
      - miku-voice  # Connect to voice network for RVC/TTS
      - proxy # Traefik proxy network for miku.panel
    restart: unless-stopped
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.miku.rule=Host(`miku.panel`)"
      - "traefik.http.routers.miku.entrypoints=websecure"
      - "traefik.http.routers.miku.tls=true"
      - "traefik.http.services.miku.loadbalancer.server.port=3939"
      - "traefik.docker.network=proxy"

  # ========== Voice / STT ==========
  miku-stt:
    build:
      context: ./stt-realtime
      dockerfile: Dockerfile
    container_name: miku-stt
    runtime: nvidia
    environment:
      - NVIDIA_VISIBLE_DEVICES=0  # GTX 1660
      - CUDA_VISIBLE_DEVICES=0
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
      - STT_HOST=0.0.0.0
      - STT_PORT=8766
      - STT_HTTP_PORT=8767  # HTTP health check port
    volumes:
      - stt-models:/root/.cache/huggingface  # Persistent model storage
    ports:
      - "8766:8766"  # WebSocket port
      - "8767:8767"  # HTTP health check port
    networks:
      - miku-voice
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ['0']  # GTX 1660
              capabilities: [gpu]
    restart: unless-stopped

  # ========== Tools (on-demand) ==========
  anime-face-detector:
    build: ./face-detector
    container_name: anime-face-detector
    runtime: nvidia
    deploy:
      resources:
        reservations:
          devices:
            - capabilities: [gpu]
    volumes:
      - ./face-detector/api:/app/api
      - ./face-detector/images:/app/images
    ports:
      - "7860:7860"  # Gradio UI
      - "6078:6078"  # FastAPI API
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
    restart: "no"  # Don't auto-restart - only run on-demand
    profiles:
      - tools  # Don't start by default

networks:
  miku-voice:
    external: true
    name: miku-voice-network
  proxy:
    name: proxy 
    external: true

volumes:
  stt-models:
    name: miku-stt-models