miku-discord/cheshire-cat/compose.yml

services:
  cheshire-cat-core:
    image: ghcr.io/cheshire-cat-ai/core:1.6.2
    container_name: cheshire_cat_core
    depends_on:
      - cheshire-cat-vector-memory
      - ollama
    environment:
      PYTHONUNBUFFERED: "1"
      WATCHFILES_FORCE_POLLING: "true"
      CORE_HOST: ${CORE_HOST:-localhost}
      CORE_PORT: ${CORE_PORT:-1865}
      QDRANT_HOST: ${QDRANT_HOST:-cheshire_cat_vector_memory}
      QDRANT_PORT: ${QDRANT_PORT:-6333}
      CORE_USE_SECURE_PROTOCOLS: ${CORE_USE_SECURE_PROTOCOLS:-false}
      API_KEY: ${API_KEY:-}
      LOG_LEVEL: ${LOG_LEVEL:-WARNING}
      DEBUG: ${DEBUG:-false}
      SAVE_MEMORY_SNAPSHOTS: ${SAVE_MEMORY_SNAPSHOTS:-false}
    ports:
      - "${CORE_PORT:-1865}:80"
    # This add an entry to /etc/hosts file in the container mapping host.docker.internal to the host machine IP addr, allowing the container to access services running on the host, not only on Win and Mac but also Linux.
    # See https://docs.docker.com/desktop/networking/#i-want-to-connect-from-a-container-to-a-service-on-the-host and https://docs.docker.com/reference/cli/docker/container/run/#add-host
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes:
      - ./cat/static:/app/cat/static
      - ./cat/plugins:/app/cat/plugins
      - ./cat/data:/app/cat/data
    restart: unless-stopped

  cheshire-cat-vector-memory:
    image: qdrant/qdrant:v1.9.1
    container_name: cheshire_cat_vector_memory
    environment:
      LOG_LEVEL: ${LOG_LEVEL:-WARNING}
    expose:
      - ${QDRANT_PORT:-6333}
    volumes:
      - ./cat/long_term_memory/vector:/qdrant/storage
    restart: unless-stopped

  ollama:
    image: ollama/ollama:0.1.39
    container_name: ollama_cat
    restart: unless-stopped
    environment:
      OLLAMA_HOST: "${OLLAMA_HOST:-0.0.0.0}:${OLLAMA_PORT-11434}"
      OLLAMA_DEBUG: ${OLLAMA_DEBUG:-false}
      OLLAMA_FLASH_ATTENTION: ${OLLAMA_FLASH_ATTENTION:-false}
      OLLAMA_KEEP_ALIVE: ${OLLAMA_KEEP_ALIVE:-"5m"}
      OLLAMA_MAX_LOADED_MODELS: ${OLLAMA_MAX_LOADED_MODELS:-1}
      OLLAMA_NUM_PARALLEL: ${OLLAMA_NUM_PARALLEL:-1}
    expose:
      - ${OLLAMA_PORT:-11434}
    volumes:
      - ./ollama:/root/.ollama
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [ gpu ]