Phase 3: Unified Cheshire Cat integration with WebSocket-based per-user isolation
Key changes:
- CatAdapter (bot/utils/cat_client.py): WebSocket /ws/{user_id} for chat
queries instead of HTTP POST (fixes per-user memory isolation when no
API keys are configured — HTTP defaults all users to user_id='user')
- Memory management API: 8 endpoints for status, stats, facts, episodic
memories, consolidation trigger, multi-step delete with confirmation
- Web UI: Memory tab (tab9) with collection stats, fact/episodic browser,
manual consolidation trigger, and 3-step delete flow requiring exact
confirmation string
- Bot integration: Cat-first response path with query_llama fallback for
both text and embed responses, server mood detection
- Discord bridge plugin: fixed .pop() to .get() (UserMessage is a Pydantic
BaseModelDict, not a raw dict), metadata extraction via extra attributes
- Unified docker-compose: Cat + Qdrant services merged into main compose,
bot depends_on Cat healthcheck
- All plugins (discord_bridge, memory_consolidation, miku_personality)
consolidated into cat-plugins/ for volume mount
- query_llama deprecated but functional for compatibility
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
version: '3.9'
|
||||
|
||||
services:
|
||||
# ========== LLM Backends ==========
|
||||
llama-swap:
|
||||
image: ghcr.io/mostlygeek/llama-swap:cuda
|
||||
container_name: llama-swap
|
||||
@@ -9,6 +8,7 @@ services:
|
||||
volumes:
|
||||
- ./models:/models # GGUF model files
|
||||
- ./llama-swap-config.yaml:/app/config.yaml # llama-swap configuration
|
||||
- ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja # Custom chat template
|
||||
runtime: nvidia
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
@@ -31,6 +31,7 @@ services:
|
||||
volumes:
|
||||
- ./models:/models # GGUF model files
|
||||
- ./llama-swap-rocm-config.yaml:/app/config.yaml # llama-swap configuration for AMD
|
||||
- ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja # Custom chat template
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
@@ -50,6 +51,59 @@ services:
|
||||
- HIP_VISIBLE_DEVICES=0 # Use first AMD GPU
|
||||
- GPU_DEVICE_ORDINAL=0
|
||||
|
||||
# ========== Cheshire Cat AI (Memory & Personality) ==========
|
||||
cheshire-cat:
|
||||
image: ghcr.io/cheshire-cat-ai/core:1.6.2
|
||||
container_name: miku-cheshire-cat
|
||||
depends_on:
|
||||
cheshire-cat-vector-memory:
|
||||
condition: service_started
|
||||
llama-swap-amd:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- WATCHFILES_FORCE_POLLING=true
|
||||
- CORE_HOST=localhost
|
||||
- CORE_PORT=1865
|
||||
- QDRANT_HOST=cheshire-cat-vector-memory
|
||||
- QDRANT_PORT=6333
|
||||
- CORE_USE_SECURE_PROTOCOLS=false
|
||||
- API_KEY=
|
||||
- LOG_LEVEL=INFO
|
||||
- DEBUG=true
|
||||
- SAVE_MEMORY_SNAPSHOTS=false
|
||||
- OPENAI_API_BASE=http://llama-swap-amd:8080/v1
|
||||
ports:
|
||||
- "1865:80" # Cat admin UI on host port 1865
|
||||
volumes:
|
||||
- ./cheshire-cat/cat/static:/app/cat/static
|
||||
- ./cat-plugins:/app/cat/plugins # Shared plugins directory
|
||||
- ./cheshire-cat/cat/data:/app/cat/data # Personality data (lore, prompts)
|
||||
- ./cheshire-cat/cat/log.py:/app/cat/log.py # Patched: fix loguru KeyError for third-party libs
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:80/"]
|
||||
interval: 15s
|
||||
timeout: 10s
|
||||
retries: 8
|
||||
start_period: 45s # Cat takes a while to load embedder + plugins
|
||||
|
||||
cheshire-cat-vector-memory:
|
||||
image: qdrant/qdrant:v1.9.1
|
||||
container_name: miku-qdrant
|
||||
environment:
|
||||
- LOG_LEVEL=INFO
|
||||
ports:
|
||||
- "6333:6333" # Qdrant REST API (for debugging)
|
||||
ulimits:
|
||||
nofile:
|
||||
soft: 65536
|
||||
hard: 65536
|
||||
volumes:
|
||||
- ./cheshire-cat/cat/long_term_memory/vector:/qdrant/storage
|
||||
restart: unless-stopped
|
||||
|
||||
# ========== Discord Bot ==========
|
||||
miku-bot:
|
||||
build: ./bot
|
||||
container_name: miku-bot
|
||||
@@ -62,6 +116,8 @@ services:
|
||||
condition: service_healthy
|
||||
llama-swap-amd:
|
||||
condition: service_healthy
|
||||
cheshire-cat:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
- DISCORD_BOT_TOKEN=MTM0ODAyMjY0Njc3NTc0NjY1MQ.GXsxML.nNCDOplmgNxKgqdgpAomFM2PViX10GjxyuV8uw
|
||||
- LLAMA_URL=http://llama-swap:8080
|
||||
@@ -70,13 +126,17 @@ services:
|
||||
- VISION_MODEL=vision
|
||||
- OWNER_USER_ID=209381657369772032 # Your Discord user ID for DM analysis reports
|
||||
- FACE_DETECTOR_STARTUP_TIMEOUT=60
|
||||
# Cheshire Cat integration (Phase 3)
|
||||
- CHESHIRE_CAT_URL=http://cheshire-cat:80
|
||||
- USE_CHESHIRE_CAT=true
|
||||
ports:
|
||||
- "3939:3939"
|
||||
networks:
|
||||
- default # Stay on default for llama-swap communication
|
||||
- default # Stay on default for llama-swap + cheshire-cat communication
|
||||
- miku-voice # Connect to voice network for RVC/TTS
|
||||
restart: unless-stopped
|
||||
|
||||
# ========== Voice / STT ==========
|
||||
miku-stt:
|
||||
build:
|
||||
context: ./stt-realtime
|
||||
@@ -106,6 +166,7 @@ services:
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
|
||||
# ========== Tools (on-demand) ==========
|
||||
anime-face-detector:
|
||||
build: ./face-detector
|
||||
container_name: anime-face-detector
|
||||
|
||||
Reference in New Issue
Block a user