services: # ========== LLM Backends ========== llama-swap: image: ghcr.io/mostlygeek/llama-swap:cuda container_name: llama-swap ports: - "8090:8080" # Map host port 8090 to container port 8080 volumes: - ./models:/models # GGUF model files - ./llama-swap-config.yaml:/app/config.yaml # llama-swap configuration - ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja # Custom chat template runtime: nvidia restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/health"] interval: 10s timeout: 5s retries: 10 start_period: 30s # Give more time for initial model loading environment: - NVIDIA_VISIBLE_DEVICES=all - LOG_LEVEL=debug # Enable verbose logging for llama-swap llama-swap-amd: build: context: . dockerfile: Dockerfile.llamaswap-rocm container_name: llama-swap-amd ports: - "8091:8080" # Map host port 8091 to container port 8080 volumes: - ./models:/models # GGUF model files - ./llama-swap-rocm-config.yaml:/app/config.yaml # llama-swap configuration for AMD - ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja # Custom chat template devices: - /dev/kfd:/dev/kfd - /dev/dri:/dev/dri group_add: - "985" # video group - "989" # render group restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/health"] interval: 10s timeout: 5s retries: 10 start_period: 30s # Give more time for initial model loading environment: - HSA_OVERRIDE_GFX_VERSION=10.3.0 # RX 6800 compatibility - ROCM_PATH=/opt/rocm - HIP_VISIBLE_DEVICES=0 # Use first AMD GPU - GPU_DEVICE_ORDINAL=0 # ========== Cheshire Cat AI (Memory & Personality) ========== cheshire-cat: image: ghcr.io/cheshire-cat-ai/core:1.6.2 container_name: miku-cheshire-cat depends_on: cheshire-cat-vector-memory: condition: service_started llama-swap-amd: condition: service_healthy environment: - PYTHONUNBUFFERED=1 - WATCHFILES_FORCE_POLLING=true - CORE_HOST=localhost - CORE_PORT=1865 - QDRANT_HOST=cheshire-cat-vector-memory - QDRANT_PORT=6333 - CORE_USE_SECURE_PROTOCOLS=false - API_KEY= - LOG_LEVEL=INFO - DEBUG=true - SAVE_MEMORY_SNAPSHOTS=false - OPENAI_API_BASE=http://llama-swap-amd:8080/v1 ports: - "1865:80" # Cat admin UI on host port 1865 volumes: - ./cheshire-cat/cat/static:/app/cat/static - ./cat-plugins:/app/cat/plugins # Shared plugins directory - ./cheshire-cat/cat/data:/app/cat/data # Personality data (lore, prompts) - ./bot/moods:/app/moods # Mood description files - ./bot/memory:/app/memory # Profile pictures and other memory files - ./cheshire-cat/cat/log.py:/app/cat/log.py # Patched: fix loguru KeyError for third-party libs restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:80/"] interval: 15s timeout: 10s retries: 8 start_period: 45s # Cat takes a while to load embedder + plugins cheshire-cat-vector-memory: image: qdrant/qdrant:v1.9.1 container_name: miku-qdrant environment: - LOG_LEVEL=INFO ports: - "6333:6333" # Qdrant REST API (for debugging) ulimits: nofile: soft: 65536 hard: 65536 volumes: - ./cheshire-cat/cat/long_term_memory/vector:/qdrant/storage restart: unless-stopped # ========== Discord Bot ========== miku-bot: build: ./bot container_name: miku-bot environment: - TZ=Europe/Sofia volumes: - ./bot/memory:/app/memory - /home/koko210Serve/ComfyUI/output:/app/ComfyUI/output:ro - /var/run/docker.sock:/var/run/docker.sock # Allow container management - ./.env:/app/.env:ro # Mount .env file (read-only) - ./config.yaml:/app/config.yaml:ro # Mount config file (read-only) depends_on: llama-swap: condition: service_healthy llama-swap-amd: condition: service_healthy cheshire-cat: condition: service_healthy env_file: - .env # Load environment variables from .env file ports: - "3939:3939" networks: - default # Stay on default for llama-swap + cheshire-cat communication - miku-voice # Connect to voice network for RVC/TTS restart: unless-stopped # ========== Voice / STT ========== miku-stt: build: context: ./stt-realtime dockerfile: Dockerfile container_name: miku-stt runtime: nvidia environment: - NVIDIA_VISIBLE_DEVICES=0 # GTX 1660 - CUDA_VISIBLE_DEVICES=0 - NVIDIA_DRIVER_CAPABILITIES=compute,utility - STT_HOST=0.0.0.0 - STT_PORT=8766 - STT_HTTP_PORT=8767 # HTTP health check port volumes: - stt-models:/root/.cache/huggingface # Persistent model storage ports: - "8766:8766" # WebSocket port - "8767:8767" # HTTP health check port networks: - miku-voice deploy: resources: reservations: devices: - driver: nvidia device_ids: ['0'] # GTX 1660 capabilities: [gpu] restart: unless-stopped # ========== Tools (on-demand) ========== anime-face-detector: build: ./face-detector container_name: anime-face-detector runtime: nvidia deploy: resources: reservations: devices: - capabilities: [gpu] volumes: - ./face-detector/api:/app/api - ./face-detector/images:/app/images ports: - "7860:7860" # Gradio UI - "6078:6078" # FastAPI API environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility restart: "no" # Don't auto-restart - only run on-demand profiles: - tools # Don't start by default networks: miku-voice: external: true name: miku-voice-network volumes: stt-models: name: miku-stt-models