Phase 3: Unified Cheshire Cat integration with WebSocket-based per-user isolation

Key changes: - CatAdapter (bot/utils/cat_client.py): WebSocket /ws/{user_id} for chat queries instead of HTTP POST (fixes per-user memory isolation when no API keys are configured — HTTP defaults all users to user_id='user') - Memory management API: 8 endpoints for status, stats, facts, episodic memories, consolidation trigger, multi-step delete with confirmation - Web UI: Memory tab (tab9) with collection stats, fact/episodic browser, manual consolidation trigger, and 3-step delete flow requiring exact confirmation string - Bot integration: Cat-first response path with query_llama fallback for both text and embed responses, server mood detection - Discord bridge plugin: fixed .pop() to .get() (UserMessage is a Pydantic BaseModelDict, not a raw dict), metadata extraction via extra attributes - Unified docker-compose: Cat + Qdrant services merged into main compose, bot depends_on Cat healthcheck - All plugins (discord_bridge, memory_consolidation, miku_personality) consolidated into cat-plugins/ for volume mount - query_llama deprecated but functional for compatibility
2026-02-07 20:22:03 +02:00
parent edb88e9ede
commit 14e1a8df51
14 changed files with 1382 additions and 70 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,5 @@
-version: '3.9'
-
 services:
+  # ========== LLM Backends ==========
  llama-swap:
    image: ghcr.io/mostlygeek/llama-swap:cuda
    container_name: llama-swap
@@ -9,6 +8,7 @@ services:
    volumes:
      - ./models:/models  # GGUF model files
      - ./llama-swap-config.yaml:/app/config.yaml  # llama-swap configuration
+      - ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja  # Custom chat template
    runtime: nvidia
    restart: unless-stopped
    healthcheck:
@@ -31,6 +31,7 @@ services:
    volumes:
      - ./models:/models  # GGUF model files
      - ./llama-swap-rocm-config.yaml:/app/config.yaml  # llama-swap configuration for AMD
+      - ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja  # Custom chat template
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
@@ -50,6 +51,59 @@ services:
      - HIP_VISIBLE_DEVICES=0  # Use first AMD GPU
      - GPU_DEVICE_ORDINAL=0

+  # ========== Cheshire Cat AI (Memory & Personality) ==========
+  cheshire-cat:
+    image: ghcr.io/cheshire-cat-ai/core:1.6.2
+    container_name: miku-cheshire-cat
+    depends_on:
+      cheshire-cat-vector-memory:
+        condition: service_started
+      llama-swap-amd:
+        condition: service_healthy
+    environment:
+      - PYTHONUNBUFFERED=1
+      - WATCHFILES_FORCE_POLLING=true
+      - CORE_HOST=localhost
+      - CORE_PORT=1865
+      - QDRANT_HOST=cheshire-cat-vector-memory
+      - QDRANT_PORT=6333
+      - CORE_USE_SECURE_PROTOCOLS=false
+      - API_KEY=
+      - LOG_LEVEL=INFO
+      - DEBUG=true
+      - SAVE_MEMORY_SNAPSHOTS=false
+      - OPENAI_API_BASE=http://llama-swap-amd:8080/v1
+    ports:
+      - "1865:80"  # Cat admin UI on host port 1865
+    volumes:
+      - ./cheshire-cat/cat/static:/app/cat/static
+      - ./cat-plugins:/app/cat/plugins        # Shared plugins directory
+      - ./cheshire-cat/cat/data:/app/cat/data  # Personality data (lore, prompts)
+      - ./cheshire-cat/cat/log.py:/app/cat/log.py  # Patched: fix loguru KeyError for third-party libs
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:80/"]
+      interval: 15s
+      timeout: 10s
+      retries: 8
+      start_period: 45s  # Cat takes a while to load embedder + plugins
+
+  cheshire-cat-vector-memory:
+    image: qdrant/qdrant:v1.9.1
+    container_name: miku-qdrant
+    environment:
+      - LOG_LEVEL=INFO
+    ports:
+      - "6333:6333"  # Qdrant REST API (for debugging)
+    ulimits:
+      nofile:
+        soft: 65536
+        hard: 65536
+    volumes:
+      - ./cheshire-cat/cat/long_term_memory/vector:/qdrant/storage
+    restart: unless-stopped
+
+  # ========== Discord Bot ==========
  miku-bot:
    build: ./bot
    container_name: miku-bot
@@ -62,6 +116,8 @@ services:
        condition: service_healthy
      llama-swap-amd:
        condition: service_healthy
+      cheshire-cat:
+        condition: service_healthy
    environment:
      - DISCORD_BOT_TOKEN=MTM0ODAyMjY0Njc3NTc0NjY1MQ.GXsxML.nNCDOplmgNxKgqdgpAomFM2PViX10GjxyuV8uw
      - LLAMA_URL=http://llama-swap:8080
@@ -70,13 +126,17 @@ services:
      - VISION_MODEL=vision
      - OWNER_USER_ID=209381657369772032  # Your Discord user ID for DM analysis reports
      - FACE_DETECTOR_STARTUP_TIMEOUT=60
+      # Cheshire Cat integration (Phase 3)
+      - CHESHIRE_CAT_URL=http://cheshire-cat:80
+      - USE_CHESHIRE_CAT=true
    ports:
      - "3939:3939"
    networks:
-      - default  # Stay on default for llama-swap communication
+      - default  # Stay on default for llama-swap + cheshire-cat communication
      - miku-voice  # Connect to voice network for RVC/TTS
    restart: unless-stopped

+  # ========== Voice / STT ==========
  miku-stt:
    build:
      context: ./stt-realtime
@@ -106,6 +166,7 @@ services:
              capabilities: [gpu]
    restart: unless-stopped

+  # ========== Tools (on-demand) ==========
  anime-face-detector:
    build: ./face-detector
    container_name: anime-face-detector