Phase 3: Unified Cheshire Cat integration with WebSocket-based per-user isolation

Key changes:
- CatAdapter (bot/utils/cat_client.py): WebSocket /ws/{user_id} for chat
  queries instead of HTTP POST (fixes per-user memory isolation when no
  API keys are configured — HTTP defaults all users to user_id='user')
- Memory management API: 8 endpoints for status, stats, facts, episodic
  memories, consolidation trigger, multi-step delete with confirmation
- Web UI: Memory tab (tab9) with collection stats, fact/episodic browser,
  manual consolidation trigger, and 3-step delete flow requiring exact
  confirmation string
- Bot integration: Cat-first response path with query_llama fallback for
  both text and embed responses, server mood detection
- Discord bridge plugin: fixed .pop() to .get() (UserMessage is a Pydantic
  BaseModelDict, not a raw dict), metadata extraction via extra attributes
- Unified docker-compose: Cat + Qdrant services merged into main compose,
  bot depends_on Cat healthcheck
- All plugins (discord_bridge, memory_consolidation, miku_personality)
  consolidated into cat-plugins/ for volume mount
- query_llama deprecated but functional for compatibility
This commit is contained in:
2026-02-07 20:22:03 +02:00
parent edb88e9ede
commit 14e1a8df51
14 changed files with 1382 additions and 70 deletions

View File

@@ -1,6 +1,5 @@
version: '3.9'
services:
# ========== LLM Backends ==========
llama-swap:
image: ghcr.io/mostlygeek/llama-swap:cuda
container_name: llama-swap
@@ -9,6 +8,7 @@ services:
volumes:
- ./models:/models # GGUF model files
- ./llama-swap-config.yaml:/app/config.yaml # llama-swap configuration
- ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja # Custom chat template
runtime: nvidia
restart: unless-stopped
healthcheck:
@@ -31,6 +31,7 @@ services:
volumes:
- ./models:/models # GGUF model files
- ./llama-swap-rocm-config.yaml:/app/config.yaml # llama-swap configuration for AMD
- ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja # Custom chat template
devices:
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
@@ -50,6 +51,59 @@ services:
- HIP_VISIBLE_DEVICES=0 # Use first AMD GPU
- GPU_DEVICE_ORDINAL=0
# ========== Cheshire Cat AI (Memory & Personality) ==========
cheshire-cat:
image: ghcr.io/cheshire-cat-ai/core:1.6.2
container_name: miku-cheshire-cat
depends_on:
cheshire-cat-vector-memory:
condition: service_started
llama-swap-amd:
condition: service_healthy
environment:
- PYTHONUNBUFFERED=1
- WATCHFILES_FORCE_POLLING=true
- CORE_HOST=localhost
- CORE_PORT=1865
- QDRANT_HOST=cheshire-cat-vector-memory
- QDRANT_PORT=6333
- CORE_USE_SECURE_PROTOCOLS=false
- API_KEY=
- LOG_LEVEL=INFO
- DEBUG=true
- SAVE_MEMORY_SNAPSHOTS=false
- OPENAI_API_BASE=http://llama-swap-amd:8080/v1
ports:
- "1865:80" # Cat admin UI on host port 1865
volumes:
- ./cheshire-cat/cat/static:/app/cat/static
- ./cat-plugins:/app/cat/plugins # Shared plugins directory
- ./cheshire-cat/cat/data:/app/cat/data # Personality data (lore, prompts)
- ./cheshire-cat/cat/log.py:/app/cat/log.py # Patched: fix loguru KeyError for third-party libs
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:80/"]
interval: 15s
timeout: 10s
retries: 8
start_period: 45s # Cat takes a while to load embedder + plugins
cheshire-cat-vector-memory:
image: qdrant/qdrant:v1.9.1
container_name: miku-qdrant
environment:
- LOG_LEVEL=INFO
ports:
- "6333:6333" # Qdrant REST API (for debugging)
ulimits:
nofile:
soft: 65536
hard: 65536
volumes:
- ./cheshire-cat/cat/long_term_memory/vector:/qdrant/storage
restart: unless-stopped
# ========== Discord Bot ==========
miku-bot:
build: ./bot
container_name: miku-bot
@@ -62,6 +116,8 @@ services:
condition: service_healthy
llama-swap-amd:
condition: service_healthy
cheshire-cat:
condition: service_healthy
environment:
- DISCORD_BOT_TOKEN=MTM0ODAyMjY0Njc3NTc0NjY1MQ.GXsxML.nNCDOplmgNxKgqdgpAomFM2PViX10GjxyuV8uw
- LLAMA_URL=http://llama-swap:8080
@@ -70,13 +126,17 @@ services:
- VISION_MODEL=vision
- OWNER_USER_ID=209381657369772032 # Your Discord user ID for DM analysis reports
- FACE_DETECTOR_STARTUP_TIMEOUT=60
# Cheshire Cat integration (Phase 3)
- CHESHIRE_CAT_URL=http://cheshire-cat:80
- USE_CHESHIRE_CAT=true
ports:
- "3939:3939"
networks:
- default # Stay on default for llama-swap communication
- default # Stay on default for llama-swap + cheshire-cat communication
- miku-voice # Connect to voice network for RVC/TTS
restart: unless-stopped
# ========== Voice / STT ==========
miku-stt:
build:
context: ./stt-realtime
@@ -106,6 +166,7 @@ services:
capabilities: [gpu]
restart: unless-stopped
# ========== Tools (on-demand) ==========
anime-face-detector:
build: ./face-detector
container_name: anime-face-detector