Phase 3: Unified Cheshire Cat integration with WebSocket-based per-user isolation

Key changes: - CatAdapter (bot/utils/cat_client.py): WebSocket /ws/{user_id} for chat queries instead of HTTP POST (fixes per-user memory isolation when no API keys are configured — HTTP defaults all users to user_id='user') - Memory management API: 8 endpoints for status, stats, facts, episodic memories, consolidation trigger, multi-step delete with confirmation - Web UI: Memory tab (tab9) with collection stats, fact/episodic browser, manual consolidation trigger, and 3-step delete flow requiring exact confirmation string - Bot integration: Cat-first response path with query_llama fallback for both text and embed responses, server mood detection - Discord bridge plugin: fixed .pop() to .get() (UserMessage is a Pydantic BaseModelDict, not a raw dict), metadata extraction via extra attributes - Unified docker-compose: Cat + Qdrant services merged into main compose, bot depends_on Cat healthcheck - All plugins (discord_bridge, memory_consolidation, miku_personality) consolidated into cat-plugins/ for volume mount - query_llama deprecated but functional for compatibility
2026-02-07 20:22:03 +02:00
parent edb88e9ede
commit 14e1a8df51
14 changed files with 1382 additions and 70 deletions
--- a/bot/api.py
+++ b/bot/api.py
@@ -2772,6 +2772,134 @@ def set_voice_debug_mode(enabled: bool = Form(...)):
    }
 # ========== Cheshire Cat Memory Management (Phase 3) ==========
 class MemoryDeleteRequest(BaseModel):
    confirmation: str
@app.get("/memory/status")
 async def get_cat_memory_status():
    """Get Cheshire Cat connection status and feature flag."""
    from utils.cat_client import cat_adapter
    is_healthy = await cat_adapter.health_check()
    return {
        "enabled": globals.USE_CHESHIRE_CAT,
        "healthy": is_healthy,
        "url": globals.CHESHIRE_CAT_URL,
        "circuit_breaker_active": cat_adapter._is_circuit_broken(),
        "consecutive_failures": cat_adapter._consecutive_failures
    }
@app.post("/memory/toggle")
 async def toggle_cat_integration(enabled: bool = Form(...)):
    """Toggle Cheshire Cat integration on/off."""
    globals.USE_CHESHIRE_CAT = enabled
    logger.info(f"🐱 Cheshire Cat integration {'ENABLED' if enabled else 'DISABLED'}")
    return {
        "success": True,
        "enabled": globals.USE_CHESHIRE_CAT,
        "message": f"Cheshire Cat {'enabled' if enabled else 'disabled'}"
    }
@app.get("/memory/stats")
 async def get_memory_stats():
    """Get memory collection statistics from Cheshire Cat (point counts per collection)."""
    from utils.cat_client import cat_adapter
    stats = await cat_adapter.get_memory_stats()
    if stats is None:
        return {"success": False, "error": "Could not reach Cheshire Cat"}
    return {"success": True, "collections": stats.get("collections", [])}
@app.get("/memory/facts")
 async def get_memory_facts():
    """Get all declarative memory facts (learned knowledge about users)."""
    from utils.cat_client import cat_adapter
    facts = await cat_adapter.get_all_facts()
    return {"success": True, "facts": facts, "count": len(facts)}
@app.get("/memory/episodic")
 async def get_episodic_memories():
    """Get all episodic memories (conversation snippets)."""
    from utils.cat_client import cat_adapter
    result = await cat_adapter.get_memory_points(collection="episodic", limit=100)
    if result is None:
        return {"success": False, "error": "Could not reach Cheshire Cat"}
    memories = []
    for point in result.get("points", []):
        payload = point.get("payload", {})
        memories.append({
            "id": point.get("id"),
            "content": payload.get("page_content", ""),
            "metadata": payload.get("metadata", {}),
        })
    return {"success": True, "memories": memories, "count": len(memories)}
@app.post("/memory/consolidate")
 async def trigger_memory_consolidation():
    """Manually trigger memory consolidation (sleep consolidation process)."""
    from utils.cat_client import cat_adapter
    logger.info("🌙 Manual memory consolidation triggered via API")
    result = await cat_adapter.trigger_consolidation()
    if result is None:
        return {"success": False, "error": "Consolidation failed or timed out"}
    return {"success": True, "result": result}
@app.post("/memory/delete")
 async def delete_all_memories(request: MemoryDeleteRequest):
    """
    Delete ALL of Miku's memories. Requires exact confirmation string.
    The confirmation field must be exactly:
        "Yes, I am deleting Miku's memories fully."
    This is destructive and irreversible.
    """
    REQUIRED_CONFIRMATION = "Yes, I am deleting Miku's memories fully."
    if request.confirmation != REQUIRED_CONFIRMATION:
        logger.warning(f"Memory deletion rejected: wrong confirmation string")
        return {
            "success": False,
            "error": "Confirmation string does not match. "
                     f"Expected exactly: \"{REQUIRED_CONFIRMATION}\""
        }
    from utils.cat_client import cat_adapter
    logger.warning("⚠️ MEMORY DELETION CONFIRMED — wiping all memories!")
    # Wipe vector memories (episodic + declarative)
    wipe_success = await cat_adapter.wipe_all_memories()
    # Also clear conversation history
    history_success = await cat_adapter.wipe_conversation_history()
    if wipe_success:
        logger.warning("🗑️ All Miku memories have been deleted.")
        return {
            "success": True,
            "message": "All memories have been permanently deleted.",
            "vector_memory_wiped": wipe_success,
            "conversation_history_cleared": history_success
        }
    else:
        return {
            "success": False,
            "error": "Failed to wipe memory collections. Check Cat connection."
        }
@app.delete("/memory/point/{collection}/{point_id}")
 async def delete_single_memory_point(collection: str, point_id: str):
    """Delete a single memory point by collection and ID."""
    from utils.cat_client import cat_adapter
    success = await cat_adapter.delete_memory_point(collection, point_id)
    if success:
        return {"success": True, "deleted": point_id}
    else:
        return {"success": False, "error": f"Failed to delete point {point_id}"}
 def start_api():
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=3939)
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -513,13 +513,33 @@ async def on_message(message):
                            response_type = "dm_response" if is_dm else "server_response"
                            author_name = message.author.display_name
-                            response = await query_llama(
+                            # Phase 3: Try Cat pipeline first for embed responses too
-                                enhanced_prompt,
+                            response = None
-                                user_id=str(message.author.id),
+                            if globals.USE_CHESHIRE_CAT:
-                                guild_id=guild_id,
+                                try:
-                                response_type=response_type,
+                                    from utils.cat_client import cat_adapter
-                                author_name=author_name
+                                    response = await cat_adapter.query(
-                            )
+                                        text=enhanced_prompt,
                                        user_id=str(message.author.id),
                                        guild_id=str(guild_id) if guild_id else None,
                                        author_name=author_name,
                                        mood=globals.DM_MOOD,
                                        response_type=response_type,
                                    )
                                    if response:
                                        logger.info(f"🐱 Cat embed response for {author_name}")
                                except Exception as e:
                                    logger.warning(f"🐱 Cat embed error, fallback: {e}")
                                    response = None
                            if not response:
                                response = await query_llama(
                                    enhanced_prompt,
                                    user_id=str(message.author.id),
                                    guild_id=guild_id,
                                    response_type=response_type,
                                    author_name=author_name
                                )
                            if is_dm:
                                logger.info(f"💌 DM embed response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
@@ -570,13 +590,46 @@ async def on_message(message):
            guild_id = message.guild.id if message.guild else None
            response_type = "dm_response" if is_dm else "server_response"
            author_name = message.author.display_name
-            response = await query_llama(
+
-                prompt, 
+            # Phase 3: Try Cheshire Cat pipeline first (memory-augmented response)
-                user_id=str(message.author.id), 
+            # Falls back to query_llama if Cat is unavailable or disabled
-                guild_id=guild_id, 
+            response = None
-                response_type=response_type,
+            if globals.USE_CHESHIRE_CAT:
-                author_name=author_name
+                try:
-            )
+                    from utils.cat_client import cat_adapter
                    current_mood = globals.DM_MOOD
                    if guild_id:
                        try:
                            from server_manager import server_manager
                            sc = server_manager.get_server_config(guild_id)
                            if sc:
                                current_mood = sc.current_mood_name
                        except Exception:
                            pass
                    response = await cat_adapter.query(
                        text=prompt,
                        user_id=str(message.author.id),
                        guild_id=str(guild_id) if guild_id else None,
                        author_name=author_name,
                        mood=current_mood,
                        response_type=response_type,
                    )
                    if response:
                        logger.info(f"🐱 Cat response for {author_name} (mood: {current_mood})")
                except Exception as e:
                    logger.warning(f"🐱 Cat pipeline error, falling back to query_llama: {e}")
                    response = None
            # Fallback to direct LLM query if Cat didn't respond
            if not response:
                response = await query_llama(
                    prompt, 
                    user_id=str(message.author.id), 
                    guild_id=guild_id, 
                    response_type=response_type,
                    author_name=author_name
                )
            if is_dm:
                logger.info(f"💌 DM response to {message.author.display_name} (using DM mood: {globals.DM_MOOD})")
--- a/bot/globals.py
+++ b/bot/globals.py
@@ -29,6 +29,12 @@ EVIL_TEXT_MODEL = os.getenv("EVIL_TEXT_MODEL", "darkidol")  # Uncensored model f
 JAPANESE_TEXT_MODEL = os.getenv("JAPANESE_TEXT_MODEL", "swallow")  # Llama 3.1 Swallow model for Japanese
 OWNER_USER_ID = int(os.getenv("OWNER_USER_ID", "209381657369772032"))  # Bot owner's Discord user ID for reports
 # Cheshire Cat AI integration (Phase 3)
 CHESHIRE_CAT_URL = os.getenv("CHESHIRE_CAT_URL", "http://cheshire-cat:80")
 USE_CHESHIRE_CAT = os.getenv("USE_CHESHIRE_CAT", "false").lower() == "true"
 CHESHIRE_CAT_API_KEY = os.getenv("CHESHIRE_CAT_API_KEY", "")  # Empty = no auth
 CHESHIRE_CAT_TIMEOUT = int(os.getenv("CHESHIRE_CAT_TIMEOUT", "120"))  # Seconds
 # Language mode for Miku (english or japanese)
 LANGUAGE_MODE = "english"  # Can be "english" or "japanese"
--- a/bot/static/index.html
+++ b/bot/static/index.html
@@ -665,6 +665,7 @@
      <button class="tab-button" onclick="switchTab('tab6')">📊 Autonomous Stats</button>
      <button class="tab-button" onclick="switchTab('tab7')">💬 Chat with LLM</button>
      <button class="tab-button" onclick="switchTab('tab8')">📞 Voice Call</button>
      <button class="tab-button" onclick="switchTab('tab9')">🧠 Memories</button>
      <button class="tab-button" onclick="window.location.href='/static/system.html'">🎛️ System Settings</button>
    </div>
@@ -1547,6 +1548,142 @@
  </div>
 </div>
    <!-- Tab 9: Memory Management -->
    <div id="tab9" class="tab-content">
      <div class="section">
        <h3>🧠 Cheshire Cat Memory Management</h3>
        <p style="color: #aaa; margin-bottom: 1rem;">
          Manage Miku's long-term memories powered by the Cheshire Cat AI pipeline.
          Memories are stored in Qdrant vector database and used to give Miku persistent knowledge about users.
        </p>
        <!-- Cat Integration Status -->
        <div id="cat-status-section" style="background: #1a1a2e; border: 1px solid #444; border-radius: 8px; padding: 1rem; margin-bottom: 1.5rem;">
          <div style="display: flex; justify-content: space-between; align-items: center;">
            <div>
              <h4 style="margin: 0 0 0.3rem 0;">🐱 Cheshire Cat Status</h4>
              <span id="cat-status-indicator" style="color: #888;">Checking...</span>
            </div>
            <div style="display: flex; gap: 0.5rem; align-items: center;">
              <button id="cat-toggle-btn" onclick="toggleCatIntegration()" style="background: #333; color: #fff; padding: 0.4rem 0.8rem; border: 2px solid #666; border-radius: 4px; cursor: pointer; font-weight: bold; font-size: 0.85rem;">
                Loading...
              </button>
              <button onclick="refreshMemoryStats()" style="background: #2a5599; color: #fff; padding: 0.4rem 0.8rem; border: none; border-radius: 4px; cursor: pointer;">
                🔄 Refresh
              </button>
            </div>
          </div>
        </div>
        <!-- Memory Statistics -->
        <div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem; margin-bottom: 1.5rem;">
          <div id="stat-episodic" style="background: #1a2332; border: 1px solid #2a5599; border-radius: 8px; padding: 1rem; text-align: center;">
            <div style="font-size: 2rem; font-weight: bold; color: #61dafb;" id="stat-episodic-count">—</div>
            <div style="color: #aaa; font-size: 0.85rem;">📝 Episodic Memories</div>
            <div style="color: #666; font-size: 0.75rem; margin-top: 0.3rem;">Conversation snippets</div>
          </div>
          <div id="stat-declarative" style="background: #1a3322; border: 1px solid #2a9955; border-radius: 8px; padding: 1rem; text-align: center;">
            <div style="font-size: 2rem; font-weight: bold; color: #6fdc6f;" id="stat-declarative-count">—</div>
            <div style="color: #aaa; font-size: 0.85rem;">📚 Declarative Facts</div>
            <div style="color: #666; font-size: 0.75rem; margin-top: 0.3rem;">Learned knowledge</div>
          </div>
          <div id="stat-procedural" style="background: #332a1a; border: 1px solid #995e2a; border-radius: 8px; padding: 1rem; text-align: center;">
            <div style="font-size: 2rem; font-weight: bold; color: #dcb06f;" id="stat-procedural-count">—</div>
            <div style="color: #aaa; font-size: 0.85rem;">⚙️ Procedural</div>
            <div style="color: #666; font-size: 0.75rem; margin-top: 0.3rem;">Tools & procedures</div>
          </div>
        </div>
        <!-- Consolidation -->
        <div style="background: #1a1a2e; border: 1px solid #444; border-radius: 8px; padding: 1rem; margin-bottom: 1.5rem;">
          <h4 style="margin: 0 0 0.5rem 0;">🌙 Memory Consolidation</h4>
          <p style="color: #aaa; font-size: 0.85rem; margin-bottom: 0.75rem;">
            Trigger the sleep consolidation process: analyzes episodic memories, extracts important facts, and removes trivial entries.
          </p>
          <div style="display: flex; gap: 0.5rem; align-items: center;">
            <button id="consolidate-btn" onclick="triggerConsolidation()" style="background: #5b3a8c; color: #fff; padding: 0.5rem 1rem; border: none; border-radius: 4px; cursor: pointer; font-weight: bold;">
              🌙 Run Consolidation
            </button>
            <span id="consolidation-status" style="color: #888; font-size: 0.85rem;"></span>
          </div>
          <div id="consolidation-result" style="display: none; margin-top: 0.75rem; background: #111; border: 1px solid #333; border-radius: 4px; padding: 0.75rem; font-size: 0.85rem; color: #ccc; white-space: pre-wrap; max-height: 200px; overflow-y: auto;"></div>
        </div>
        <!-- Declarative Facts Browser -->
        <div style="background: #1a1a2e; border: 1px solid #444; border-radius: 8px; padding: 1rem; margin-bottom: 1.5rem;">
          <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.75rem;">
            <h4 style="margin: 0;">📚 Declarative Facts</h4>
            <button onclick="loadFacts()" style="background: #2a5599; color: #fff; padding: 0.3rem 0.7rem; border: none; border-radius: 4px; cursor: pointer; font-size: 0.85rem;">
              🔄 Load Facts
            </button>
          </div>
          <div id="facts-list" style="max-height: 400px; overflow-y: auto;">
            <div style="text-align: center; color: #666; padding: 2rem;">Click "Load Facts" to view stored knowledge</div>
          </div>
        </div>
        <!-- Episodic Memories Browser -->
        <div style="background: #1a1a2e; border: 1px solid #444; border-radius: 8px; padding: 1rem; margin-bottom: 1.5rem;">
          <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.75rem;">
            <h4 style="margin: 0;">📝 Episodic Memories</h4>
            <button onclick="loadEpisodicMemories()" style="background: #2a5599; color: #fff; padding: 0.3rem 0.7rem; border: none; border-radius: 4px; cursor: pointer; font-size: 0.85rem;">
              🔄 Load Memories
            </button>
          </div>
          <div id="episodic-list" style="max-height: 400px; overflow-y: auto;">
            <div style="text-align: center; color: #666; padding: 2rem;">Click "Load Memories" to view conversation snippets</div>
          </div>
        </div>
        <!-- DANGER ZONE: Delete All Memories -->
        <div style="background: #2e1a1a; border: 2px solid #993333; border-radius: 8px; padding: 1rem;">
          <h4 style="margin: 0 0 0.5rem 0; color: #ff6b6b;">⚠️ Danger Zone — Delete All Memories</h4>
          <p style="color: #cc9999; font-size: 0.85rem; margin-bottom: 1rem;">
            This will permanently erase ALL of Miku's memories — episodic conversations, learned facts, everything.
            This action is <strong>irreversible</strong>. Miku will forget everything she has ever learned.
          </p>
          <!-- Step 1: Initial checkbox -->
          <div id="delete-step-1" style="margin-bottom: 0.75rem;">
            <label style="cursor: pointer; color: #ff9999;">
              <input type="checkbox" id="delete-checkbox-1" onchange="onDeleteStep1Change()">
              I understand this will permanently delete all of Miku's memories
            </label>
          </div>
          <!-- Step 2: Second confirmation (hidden initially) -->
          <div id="delete-step-2" style="display: none; margin-bottom: 0.75rem;">
            <label style="cursor: pointer; color: #ff9999;">
              <input type="checkbox" id="delete-checkbox-2" onchange="onDeleteStep2Change()">
              I confirm this is irreversible and I want to proceed
            </label>
          </div>
          <!-- Step 3: Type confirmation string (hidden initially) -->
          <div id="delete-step-3" style="display: none; margin-bottom: 0.75rem;">
            <p style="color: #ff6b6b; font-size: 0.85rem; margin-bottom: 0.5rem;">
              Type exactly: <code style="background: #333; padding: 0.2rem 0.4rem; border-radius: 3px; color: #ff9999;">Yes, I am deleting Miku's memories fully.</code>
            </p>
            <input type="text" id="delete-confirmation-input" placeholder="Type the confirmation string..."
              style="width: 100%; padding: 0.5rem; background: #1a1a1a; color: #ff9999; border: 1px solid #993333; border-radius: 4px; font-family: monospace; box-sizing: border-box;"
              oninput="onDeleteInputChange()">
          </div>
          <!-- Final delete button (hidden initially) -->
          <div id="delete-step-final" style="display: none;">
            <button id="delete-all-btn" onclick="executeDeleteAllMemories()" disabled
              style="background: #cc3333; color: #fff; padding: 0.5rem 1.5rem; border: none; border-radius: 4px; cursor: not-allowed; font-weight: bold; opacity: 0.5;">
              🗑️ Permanently Delete All Memories
            </button>
            <button onclick="resetDeleteFlow()" style="background: #444; color: #ccc; padding: 0.5rem 1rem; border: none; border-radius: 4px; cursor: pointer; margin-left: 0.5rem;">
              Cancel
            </button>
          </div>
        </div>
      </div>
    </div>
 <div class="logs">
  <h3>Logs</h3>
  <div id="logs-content"></div>
@@ -1611,6 +1748,10 @@ function switchTab(tabId) {
    console.log('🔄 Refreshing figurine subscribers for Server Management tab');
    refreshFigurineSubscribers();
  }
  if (tabId === 'tab9') {
    console.log('🧠 Refreshing memory stats for Memories tab');
    refreshMemoryStats();
  }
 }
 // Initialize
@@ -5020,6 +5161,292 @@ function updateVoiceCallHistoryDisplay() {
  historyDiv.innerHTML = html;
 }
 // ========== Memory Management (Tab 9) ==========
 async function refreshMemoryStats() {
  try {
    // Fetch Cat status
    const statusRes = await fetch('/memory/status');
    const statusData = await statusRes.json();
    const indicator = document.getElementById('cat-status-indicator');
    const toggleBtn = document.getElementById('cat-toggle-btn');
    if (statusData.healthy) {
      indicator.innerHTML = `<span style="color: #6fdc6f;">● Connected</span> — ${statusData.url}`;
    } else {
      indicator.innerHTML = `<span style="color: #ff6b6b;">● Disconnected</span> — ${statusData.url}`;
    }
    if (statusData.circuit_breaker_active) {
      indicator.innerHTML += ` <span style="color: #dcb06f;">(circuit breaker active)</span>`;
    }
    toggleBtn.textContent = statusData.enabled ? '🐱 Cat: ON' : '😿 Cat: OFF';
    toggleBtn.style.background = statusData.enabled ? '#2a7a2a' : '#7a2a2a';
    toggleBtn.style.borderColor = statusData.enabled ? '#4a9a4a' : '#9a4a4a';
    // Fetch memory stats
    const statsRes = await fetch('/memory/stats');
    const statsData = await statsRes.json();
    if (statsData.success && statsData.collections) {
      const collections = {};
      statsData.collections.forEach(c => { collections[c.name] = c.vectors_count; });
      document.getElementById('stat-episodic-count').textContent = collections['episodic'] ?? '—';
      document.getElementById('stat-declarative-count').textContent = collections['declarative'] ?? '—';
      document.getElementById('stat-procedural-count').textContent = collections['procedural'] ?? '—';
    } else {
      document.getElementById('stat-episodic-count').textContent = '—';
      document.getElementById('stat-declarative-count').textContent = '—';
      document.getElementById('stat-procedural-count').textContent = '—';
    }
  } catch (err) {
    console.error('Error refreshing memory stats:', err);
    document.getElementById('cat-status-indicator').innerHTML = '<span style="color: #ff6b6b;">● Error checking status</span>';
  }
 }
 async function toggleCatIntegration() {
  try {
    const statusRes = await fetch('/memory/status');
    const statusData = await statusRes.json();
    const newState = !statusData.enabled;
    const formData = new FormData();
    formData.append('enabled', newState);
    const res = await fetch('/memory/toggle', { method: 'POST', body: formData });
    const data = await res.json();
    if (data.success) {
      showNotification(`Cheshire Cat ${newState ? 'enabled' : 'disabled'}`, newState ? 'success' : 'info');
      refreshMemoryStats();
    }
  } catch (err) {
    showNotification('Failed to toggle Cat integration', 'error');
  }
 }
 async function triggerConsolidation() {
  const btn = document.getElementById('consolidate-btn');
  const status = document.getElementById('consolidation-status');
  const resultDiv = document.getElementById('consolidation-result');
  btn.disabled = true;
  btn.textContent = '⏳ Running...';
  status.textContent = 'Consolidation in progress (this may take a few minutes)...';
  resultDiv.style.display = 'none';
  try {
    const res = await fetch('/memory/consolidate', { method: 'POST' });
    const data = await res.json();
    if (data.success) {
      status.textContent = '✅ Consolidation complete!';
      status.style.color = '#6fdc6f';
      resultDiv.textContent = data.result || 'Consolidation finished successfully.';
      resultDiv.style.display = 'block';
      showNotification('Memory consolidation complete', 'success');
      refreshMemoryStats();
    } else {
      status.textContent = '❌ ' + (data.error || 'Consolidation failed');
      status.style.color = '#ff6b6b';
    }
  } catch (err) {
    status.textContent = '❌ Error: ' + err.message;
    status.style.color = '#ff6b6b';
  } finally {
    btn.disabled = false;
    btn.textContent = '🌙 Run Consolidation';
  }
 }
 async function loadFacts() {
  const listDiv = document.getElementById('facts-list');
  listDiv.innerHTML = '<div style="text-align: center; color: #888; padding: 1rem;">Loading facts...</div>';
  try {
    const res = await fetch('/memory/facts');
    const data = await res.json();
    if (!data.success || data.count === 0) {
      listDiv.innerHTML = '<div style="text-align: center; color: #666; padding: 2rem;">No declarative facts stored yet.</div>';
      return;
    }
    let html = '';
    data.facts.forEach((fact, i) => {
      const source = fact.metadata?.source || 'unknown';
      const when = fact.metadata?.when ? new Date(fact.metadata.when * 1000).toLocaleString() : 'unknown';
      html += `
        <div style="background: #242424; padding: 0.6rem 0.8rem; margin-bottom: 0.4rem; border-radius: 4px; border-left: 3px solid #2a9955; display: flex; justify-content: space-between; align-items: flex-start;">
          <div style="flex: 1;">
            <div style="color: #ddd; font-size: 0.9rem;">${escapeHtml(fact.content)}</div>
            <div style="color: #666; font-size: 0.75rem; margin-top: 0.3rem;">
              Source: ${escapeHtml(source)} · ${when}
            </div>
          </div>
          <button onclick="deleteMemoryPoint('declarative', '${fact.id}', this)" 
            style="background: none; border: none; color: #993333; cursor: pointer; padding: 0.2rem 0.4rem; font-size: 0.85rem; flex-shrink: 0;" 
            title="Delete this fact">🗑️</button>
        </div>`;
    });
    listDiv.innerHTML = `<div style="color: #888; font-size: 0.8rem; margin-bottom: 0.5rem;">${data.count} facts loaded</div>` + html;
  } catch (err) {
    listDiv.innerHTML = `<div style="color: #ff6b6b; padding: 1rem;">Error loading facts: ${err.message}</div>`;
  }
 }
 async function loadEpisodicMemories() {
  const listDiv = document.getElementById('episodic-list');
  listDiv.innerHTML = '<div style="text-align: center; color: #888; padding: 1rem;">Loading memories...</div>';
  try {
    const res = await fetch('/memory/episodic');
    const data = await res.json();
    if (!data.success || data.count === 0) {
      listDiv.innerHTML = '<div style="text-align: center; color: #666; padding: 2rem;">No episodic memories stored yet.</div>';
      return;
    }
    let html = '';
    data.memories.forEach((mem, i) => {
      const source = mem.metadata?.source || 'unknown';
      const when = mem.metadata?.when ? new Date(mem.metadata.when * 1000).toLocaleString() : 'unknown';
      html += `
        <div style="background: #242424; padding: 0.6rem 0.8rem; margin-bottom: 0.4rem; border-radius: 4px; border-left: 3px solid #2a5599; display: flex; justify-content: space-between; align-items: flex-start;">
          <div style="flex: 1;">
            <div style="color: #ddd; font-size: 0.9rem;">${escapeHtml(mem.content)}</div>
            <div style="color: #666; font-size: 0.75rem; margin-top: 0.3rem;">
              Source: ${escapeHtml(source)} · ${when}
            </div>
          </div>
          <button onclick="deleteMemoryPoint('episodic', '${mem.id}', this)" 
            style="background: none; border: none; color: #993333; cursor: pointer; padding: 0.2rem 0.4rem; font-size: 0.85rem; flex-shrink: 0;" 
            title="Delete this memory">🗑️</button>
        </div>`;
    });
    listDiv.innerHTML = `<div style="color: #888; font-size: 0.8rem; margin-bottom: 0.5rem;">${data.count} memories loaded</div>` + html;
  } catch (err) {
    listDiv.innerHTML = `<div style="color: #ff6b6b; padding: 1rem;">Error loading memories: ${err.message}</div>`;
  }
 }
 async function deleteMemoryPoint(collection, pointId, btnElement) {
  if (!confirm(`Delete this ${collection} memory point?`)) return;
  try {
    const res = await fetch(`/memory/point/${collection}/${pointId}`, { method: 'DELETE' });
    const data = await res.json();
    if (data.success) {
      // Remove the row from the UI
      const row = btnElement.closest('div[style*="margin-bottom"]');
      if (row) row.remove();
      showNotification('Memory point deleted', 'success');
      refreshMemoryStats();
    } else {
      showNotification('Failed to delete: ' + (data.error || 'Unknown error'), 'error');
    }
  } catch (err) {
    showNotification('Error: ' + err.message, 'error');
  }
 }
 // Delete All Memories — Multi-step confirmation flow
 function onDeleteStep1Change() {
  const checked = document.getElementById('delete-checkbox-1').checked;
  document.getElementById('delete-step-2').style.display = checked ? 'block' : 'none';
  if (!checked) {
    document.getElementById('delete-checkbox-2').checked = false;
    document.getElementById('delete-step-3').style.display = 'none';
    document.getElementById('delete-step-final').style.display = 'none';
    document.getElementById('delete-confirmation-input').value = '';
  }
 }
 function onDeleteStep2Change() {
  const checked = document.getElementById('delete-checkbox-2').checked;
  document.getElementById('delete-step-3').style.display = checked ? 'block' : 'none';
  document.getElementById('delete-step-final').style.display = checked ? 'block' : 'none';
  if (!checked) {
    document.getElementById('delete-confirmation-input').value = '';
    updateDeleteButton();
  }
 }
 function onDeleteInputChange() {
  updateDeleteButton();
 }
 function updateDeleteButton() {
  const input = document.getElementById('delete-confirmation-input').value;
  const expected = "Yes, I am deleting Miku's memories fully.";
  const btn = document.getElementById('delete-all-btn');
  const match = input === expected;
  btn.disabled = !match;
  btn.style.cursor = match ? 'pointer' : 'not-allowed';
  btn.style.opacity = match ? '1' : '0.5';
 }
 async function executeDeleteAllMemories() {
  const input = document.getElementById('delete-confirmation-input').value;
  const expected = "Yes, I am deleting Miku's memories fully.";
  if (input !== expected) {
    showNotification('Confirmation string does not match', 'error');
    return;
  }
  const btn = document.getElementById('delete-all-btn');
  btn.disabled = true;
  btn.textContent = '⏳ Deleting...';
  try {
    const res = await fetch('/memory/delete', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ confirmation: input })
    });
    const data = await res.json();
    if (data.success) {
      showNotification('All memories have been permanently deleted', 'success');
      resetDeleteFlow();
      refreshMemoryStats();
    } else {
      showNotification('Deletion failed: ' + (data.error || 'Unknown error'), 'error');
    }
  } catch (err) {
    showNotification('Error: ' + err.message, 'error');
  } finally {
    btn.disabled = false;
    btn.textContent = '🗑️ Permanently Delete All Memories';
  }
 }
 function resetDeleteFlow() {
  document.getElementById('delete-checkbox-1').checked = false;
  document.getElementById('delete-checkbox-2').checked = false;
  document.getElementById('delete-confirmation-input').value = '';
  document.getElementById('delete-step-2').style.display = 'none';
  document.getElementById('delete-step-3').style.display = 'none';
  document.getElementById('delete-step-final').style.display = 'none';
  updateDeleteButton();
 }
 function escapeHtml(str) {
  if (!str) return '';
  const div = document.createElement('div');
  div.textContent = str;
  return div.innerHTML;
 }
 </script>
 </body>
--- a/bot/utils/cat_client.py
+++ b/bot/utils/cat_client.py
@@ -0,0 +1,479 @@
 # utils/cat_client.py
 """
 Cheshire Cat AI Adapter for Miku Discord Bot (Phase 3)
 Routes messages through the Cheshire Cat pipeline for:
 - Memory-augmented responses (episodic + declarative recall)
 - Fact extraction and consolidation
 - Per-user conversation isolation
 Uses WebSocket for chat (per-user isolation via /ws/{user_id}).
 Uses HTTP for memory management endpoints.
 Falls back to query_llama() on failure for zero-downtime resilience.
 """
 import aiohttp
 import asyncio
 import json
 import time
 from typing import Optional, Dict, Any, List
 import globals
 from utils.logger import get_logger
 logger = get_logger('cat_client')
 class CatAdapter:
    """
    Async adapter for Cheshire Cat AI.
    Uses WebSocket /ws/{user_id} for conversation (per-user memory isolation).
    Uses HTTP REST for memory management endpoints.
    Without API keys configured, HTTP POST /message defaults all users to
    user_id="user" (no isolation). WebSocket path param gives true isolation.
    """
    def __init__(self):
        self._base_url = globals.CHESHIRE_CAT_URL.rstrip('/')
        self._api_key = globals.CHESHIRE_CAT_API_KEY
        self._timeout = globals.CHESHIRE_CAT_TIMEOUT
        self._healthy = None  # None = unknown, True/False = last check result
        self._last_health_check = 0
        self._health_check_interval = 30  # seconds between health checks
        self._consecutive_failures = 0
        self._max_failures_before_circuit_break = 3
        self._circuit_broken_until = 0  # timestamp when circuit breaker resets
        logger.info(f"CatAdapter initialized: {self._base_url} (timeout={self._timeout}s)")
    def _get_headers(self) -> dict:
        """Build request headers with optional auth."""
        headers = {'Content-Type': 'application/json'}
        if self._api_key:
            headers['Authorization'] = f'Bearer {self._api_key}'
        return headers
    def _user_id_for_discord(self, user_id: str) -> str:
        """
        Format Discord user ID for Cat's user namespace.
        Cat uses user_id to isolate working memory and episodic memories.
        """
        return f"discord_{user_id}"
    async def health_check(self) -> bool:
        """
        Check if Cheshire Cat is reachable and healthy.
        Caches result to avoid hammering the endpoint.
        """
        now = time.time()
        if now - self._last_health_check < self._health_check_interval and self._healthy is not None:
            return self._healthy
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(
                    f"{self._base_url}/",
                    headers=self._get_headers(),
                    timeout=aiohttp.ClientTimeout(total=10)
                ) as response:
                    self._healthy = response.status == 200
                    self._last_health_check = now
                    if self._healthy:
                        logger.debug("Cat health check: OK")
                    else:
                        logger.warning(f"Cat health check failed: status {response.status}")
                    return self._healthy
        except Exception as e:
            self._healthy = False
            self._last_health_check = now
            logger.warning(f"Cat health check error: {e}")
            return False
    def _is_circuit_broken(self) -> bool:
        """Check if circuit breaker is active (too many consecutive failures)."""
        if self._consecutive_failures >= self._max_failures_before_circuit_break:
            if time.time() < self._circuit_broken_until:
                return True
            # Circuit breaker expired, allow retry
            logger.info("Circuit breaker reset, allowing Cat retry")
            self._consecutive_failures = 0
        return False
    async def query(
        self,
        text: str,
        user_id: str,
        guild_id: Optional[str] = None,
        author_name: Optional[str] = None,
        mood: Optional[str] = None,
        response_type: str = "dm_response",
    ) -> Optional[str]:
        """
        Send a message through the Cat pipeline via WebSocket and get a response.
        Uses WebSocket /ws/{user_id} for per-user memory isolation.
        Without API keys, HTTP POST /message defaults all users to user_id="user"
        (no isolation). The WebSocket path parameter provides true per-user isolation
        because Cat's auth handler uses user_id from the path when no keys are set.
        Args:
            text: User's message text
            user_id: Discord user ID (will be namespaced as discord_{user_id})
            guild_id: Optional guild ID for server context
            author_name: Display name of the user
            mood: Current mood name (passed as metadata for Cat hooks)
            response_type: Type of response context
        Returns:
            Cat's response text, or None if Cat is unavailable (caller should fallback)
        """
        if not globals.USE_CHESHIRE_CAT:
            return None
        if self._is_circuit_broken():
            logger.debug("Circuit breaker active, skipping Cat")
            return None
        cat_user_id = self._user_id_for_discord(user_id)
        # Build message payload with Discord metadata for our plugin hooks.
        # The discord_bridge plugin's before_cat_reads_message hook reads
        # these custom keys from the message dict.
        payload = {
            "text": text,
        }
        if guild_id:
            payload["discord_guild_id"] = str(guild_id)
        if author_name:
            payload["discord_author_name"] = author_name
        if mood:
            payload["discord_mood"] = mood
        if response_type:
            payload["discord_response_type"] = response_type
        try:
            # Build WebSocket URL from HTTP base URL
            ws_base = self._base_url.replace("http://", "ws://").replace("https://", "wss://")
            ws_url = f"{ws_base}/ws/{cat_user_id}"
            logger.debug(f"Querying Cat via WS: user={cat_user_id}, text={text[:80]}...")
            async with aiohttp.ClientSession() as session:
                async with session.ws_connect(
                    ws_url,
                    timeout=self._timeout,
                ) as ws:
                    # Send the message
                    await ws.send_json(payload)
                    # Read responses until we get the final "chat" type message.
                    # Cat may send intermediate messages (chat_token for streaming,
                    # notification for status updates). We want the final "chat" one.
                    reply_text = None
                    deadline = asyncio.get_event_loop().time() + self._timeout
                    while True:
                        remaining = deadline - asyncio.get_event_loop().time()
                        if remaining <= 0:
                            logger.error(f"Cat WS timeout after {self._timeout}s")
                            break
                        try:
                            ws_msg = await asyncio.wait_for(
                                ws.receive(),
                                timeout=remaining
                            )
                        except asyncio.TimeoutError:
                            logger.error(f"Cat WS receive timeout after {self._timeout}s")
                            break
                        # Handle WebSocket close/error frames
                        if ws_msg.type in (aiohttp.WSMsgType.CLOSE, aiohttp.WSMsgType.CLOSING, aiohttp.WSMsgType.CLOSED):
                            logger.warning("Cat WS connection closed by server")
                            break
                        if ws_msg.type == aiohttp.WSMsgType.ERROR:
                            logger.error(f"Cat WS error frame: {ws.exception()}")
                            break
                        if ws_msg.type != aiohttp.WSMsgType.TEXT:
                            logger.debug(f"Cat WS non-text frame type: {ws_msg.type}")
                            continue
                        try:
                            msg = json.loads(ws_msg.data)
                        except (json.JSONDecodeError, TypeError) as e:
                            logger.warning(f"Cat WS non-JSON message: {e}")
                            continue
                        msg_type = msg.get("type", "")
                        if msg_type == "chat":
                            # Final response — extract text
                            reply_text = msg.get("content") or msg.get("text", "")
                            break
                        elif msg_type == "chat_token":
                            # Streaming token — skip, we wait for final
                            continue
                        elif msg_type == "error":
                            error_desc = msg.get("description", "Unknown Cat error")
                            logger.error(f"Cat WS error: {error_desc}")
                            break
                        elif msg_type == "notification":
                            logger.debug(f"Cat notification: {msg.get('content', '')}")
                            continue
                        else:
                            logger.debug(f"Cat WS unknown msg type: {msg_type}")
                            continue
            if reply_text and reply_text.strip():
                self._consecutive_failures = 0
                logger.info(f"🐱 Cat response for {cat_user_id}: {reply_text[:100]}...")
                return reply_text
            else:
                logger.warning("Cat returned empty response via WS")
                self._consecutive_failures += 1
                return None
        except asyncio.TimeoutError:
            logger.error(f"Cat WS connection timeout after {self._timeout}s")
            self._consecutive_failures += 1
            if self._consecutive_failures >= self._max_failures_before_circuit_break:
                self._circuit_broken_until = time.time() + 60
                logger.warning("Circuit breaker activated (WS timeout)")
            return None
        except Exception as e:
            logger.error(f"Cat WS query error: {e}")
            self._consecutive_failures += 1
            if self._consecutive_failures >= self._max_failures_before_circuit_break:
                self._circuit_broken_until = time.time() + 60
                logger.warning(f"Circuit breaker activated: {e}")
            return None
    # ===================================================================
    # MEMORY MANAGEMENT API (for Web UI)
    # ===================================================================
    async def get_memory_stats(self) -> Optional[Dict[str, Any]]:
        """
        Get memory collection statistics from Cat.
        Returns dict with collection names and point counts.
        """
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(
                    f"{self._base_url}/memory/collections",
                    headers=self._get_headers(),
                    timeout=aiohttp.ClientTimeout(total=15)
                ) as response:
                    if response.status == 200:
                        data = await response.json()
                        return data
                    else:
                        logger.error(f"Failed to get memory stats: {response.status}")
                        return None
        except Exception as e:
            logger.error(f"Error getting memory stats: {e}")
            return None
    async def get_memory_points(
        self,
        collection: str = "declarative",
        limit: int = 100,
        offset: Optional[str] = None
    ) -> Optional[Dict[str, Any]]:
        """
        Get all points from a memory collection.
        Returns paginated list of memory points.
        """
        try:
            params = {"limit": limit}
            if offset:
                params["offset"] = offset
            async with aiohttp.ClientSession() as session:
                async with session.get(
                    f"{self._base_url}/memory/collections/{collection}/points",
                    headers=self._get_headers(),
                    params=params,
                    timeout=aiohttp.ClientTimeout(total=30)
                ) as response:
                    if response.status == 200:
                        return await response.json()
                    else:
                        logger.error(f"Failed to get {collection} points: {response.status}")
                        return None
        except Exception as e:
            logger.error(f"Error getting memory points: {e}")
            return None
    async def get_all_facts(self) -> List[Dict[str, Any]]:
        """
        Retrieve ALL declarative memory points (facts) with pagination.
        Returns a flat list of all fact dicts.
        """
        all_facts = []
        offset = None
        try:
            while True:
                result = await self.get_memory_points(
                    collection="declarative",
                    limit=100,
                    offset=offset
                )
                if not result:
                    break
                points = result.get("points", [])
                for point in points:
                    payload = point.get("payload", {})
                    fact = {
                        "id": point.get("id"),
                        "content": payload.get("page_content", ""),
                        "metadata": payload.get("metadata", {}),
                    }
                    all_facts.append(fact)
                offset = result.get("next_offset")
                if not offset:
                    break
            logger.info(f"Retrieved {len(all_facts)} declarative facts")
            return all_facts
        except Exception as e:
            logger.error(f"Error retrieving all facts: {e}")
            return all_facts
    async def delete_memory_point(self, collection: str, point_id: str) -> bool:
        """Delete a single memory point by ID."""
        try:
            async with aiohttp.ClientSession() as session:
                async with session.delete(
                    f"{self._base_url}/memory/collections/{collection}/points/{point_id}",
                    headers=self._get_headers(),
                    timeout=aiohttp.ClientTimeout(total=15)
                ) as response:
                    if response.status == 200:
                        logger.info(f"Deleted point {point_id} from {collection}")
                        return True
                    else:
                        logger.error(f"Failed to delete point: {response.status}")
                        return False
        except Exception as e:
            logger.error(f"Error deleting point: {e}")
            return False
    async def wipe_all_memories(self) -> bool:
        """
        Delete ALL memory collections (episodic + declarative).
        This is the nuclear option — requires multi-step confirmation in the UI.
        """
        try:
            async with aiohttp.ClientSession() as session:
                async with session.delete(
                    f"{self._base_url}/memory/collections",
                    headers=self._get_headers(),
                    timeout=aiohttp.ClientTimeout(total=30)
                ) as response:
                    if response.status == 200:
                        logger.warning("🗑️ ALL memory collections wiped!")
                        return True
                    else:
                        error = await response.text()
                        logger.error(f"Failed to wipe memories: {response.status} - {error}")
                        return False
        except Exception as e:
            logger.error(f"Error wiping memories: {e}")
            return False
    async def wipe_conversation_history(self) -> bool:
        """Clear working memory / conversation history."""
        try:
            async with aiohttp.ClientSession() as session:
                async with session.delete(
                    f"{self._base_url}/memory/conversation_history",
                    headers=self._get_headers(),
                    timeout=aiohttp.ClientTimeout(total=15)
                ) as response:
                    if response.status == 200:
                        logger.info("Conversation history cleared")
                        return True
                    else:
                        logger.error(f"Failed to clear conversation history: {response.status}")
                        return False
        except Exception as e:
            logger.error(f"Error clearing conversation history: {e}")
            return False
    async def trigger_consolidation(self) -> Optional[str]:
        """
        Trigger memory consolidation by sending a special message via WebSocket.
        The memory_consolidation plugin's tool 'consolidate_memories' is
        triggered when it sees 'consolidate now' in the text.
        Uses WebSocket with a system user ID for proper context.
        """
        try:
            ws_base = self._base_url.replace("http://", "ws://").replace("https://", "wss://")
            ws_url = f"{ws_base}/ws/system_consolidation"
            logger.info("🌙 Triggering memory consolidation via WS...")
            async with aiohttp.ClientSession() as session:
                async with session.ws_connect(
                    ws_url,
                    timeout=300,  # Consolidation can be very slow
                ) as ws:
                    await ws.send_json({"text": "consolidate now"})
                    # Wait for the final chat response
                    deadline = asyncio.get_event_loop().time() + 300
                    while True:
                        remaining = deadline - asyncio.get_event_loop().time()
                        if remaining <= 0:
                            logger.error("Consolidation timed out (>300s)")
                            return "Consolidation timed out"
                        try:
                            ws_msg = await asyncio.wait_for(
                                ws.receive(),
                                timeout=remaining
                            )
                        except asyncio.TimeoutError:
                            logger.error("Consolidation WS receive timeout")
                            return "Consolidation timed out waiting for response"
                        if ws_msg.type in (aiohttp.WSMsgType.CLOSE, aiohttp.WSMsgType.CLOSING, aiohttp.WSMsgType.CLOSED):
                            logger.warning("Consolidation WS closed by server")
                            return "Connection closed during consolidation"
                        if ws_msg.type == aiohttp.WSMsgType.ERROR:
                            return f"WebSocket error: {ws.exception()}"
                        if ws_msg.type != aiohttp.WSMsgType.TEXT:
                            continue
                        try:
                            msg = json.loads(ws_msg.data)
                        except (json.JSONDecodeError, TypeError):
                            continue
                        msg_type = msg.get("type", "")
                        if msg_type == "chat":
                            reply = msg.get("content") or msg.get("text", "")
                            logger.info(f"Consolidation result: {reply[:200]}")
                            return reply
                        elif msg_type == "error":
                            error_desc = msg.get("description", "Unknown error")
                            logger.error(f"Consolidation error: {error_desc}")
                            return f"Consolidation error: {error_desc}"
                        else:
                            continue
        except asyncio.TimeoutError:
            logger.error("Consolidation WS connection timed out")
            return None
        except Exception as e:
            logger.error(f"Consolidation error: {e}")
            return None
 # Singleton instance
 cat_adapter = CatAdapter()
--- a/bot/utils/llm.py
+++ b/bot/utils/llm.py
@@ -152,6 +152,13 @@ async def query_llama(user_prompt, user_id, guild_id=None, response_type="dm_res
    """
    Query llama.cpp server via llama-swap with OpenAI-compatible API.
    .. deprecated:: Phase 3
        For main conversation flow, prefer routing through the Cheshire Cat pipeline
        (via cat_client.CatAdapter.query) which provides memory-augmented responses.
        This function remains available for specialized use cases (vision, bipolar mode,
        image generation, autonomous, sentiment analysis) and as a fallback when Cat
        is unavailable.
    Args:
        user_prompt: The user's input
        user_id: User identifier (used for DM history)
--- a/cat-plugins/discord_bridge/discord_bridge.py
+++ b/cat-plugins/discord_bridge/discord_bridge.py
@@ -20,19 +20,37 @@ def before_cat_reads_message(user_message_json: dict, cat) -> dict:
    """
    Enrich incoming message with Discord metadata.
    This runs BEFORE the message is processed.
    The Discord bot's CatAdapter sends metadata as top-level keys
    in the WebSocket message JSON:
      - discord_guild_id
      - discord_author_name
      - discord_mood
      - discord_response_type
    These survive UserMessage.model_validate() as extra attributes
    (BaseModelDict has extra="allow"). We read them via .get() and
    store them in working_memory for downstream hooks.
    """
-    # Extract Discord context from working memory or metadata
+    # Extract Discord context from the message payload
-    # These will be set by the Discord bot when calling the Cat API
+    # (sent by CatAdapter.query() via WebSocket)
-    guild_id = cat.working_memory.get('guild_id')
+    # NOTE: user_message_json is a UserMessage (Pydantic BaseModelDict with extra="allow"),
-    channel_id = cat.working_memory.get('channel_id')
+    # not a raw dict. Extra keys survive model_validate() as extra attributes.
    # We use .get() since BaseModelDict implements it, but NOT .pop().
    guild_id = user_message_json.get('discord_guild_id', None)
    author_name = user_message_json.get('discord_author_name', None)
    mood = user_message_json.get('discord_mood', None)
    response_type = user_message_json.get('discord_response_type', None)
-    # Add to message metadata for later use
+    # Also check working memory for backward compatibility
-    if 'metadata' not in user_message_json:
+    if not guild_id:
-        user_message_json['metadata'] = {}
+        guild_id = cat.working_memory.get('guild_id')
-    user_message_json['metadata']['guild_id'] = guild_id or 'dm'
+    # Store in working memory so other hooks can access it
-    user_message_json['metadata']['channel_id'] = channel_id
+    cat.working_memory['guild_id'] = guild_id or 'dm'
-    user_message_json['metadata']['timestamp'] = datetime.now().isoformat()
+    cat.working_memory['author_name'] = author_name
    cat.working_memory['mood'] = mood
    cat.working_memory['response_type'] = response_type
    return user_message_json
@@ -65,17 +83,18 @@ def before_cat_stores_episodic_memory(doc, cat):
    doc.metadata['consolidated'] = False  # Needs nightly processing
    doc.metadata['stored_at'] = datetime.now().isoformat()
-    # Get Discord context from working memory
+    # Get Discord context from working memory (set by before_cat_reads_message)
-    guild_id = cat.working_memory.get('guild_id')
+    guild_id = cat.working_memory.get('guild_id', 'dm')
-    channel_id = cat.working_memory.get('channel_id')
+    author_name = cat.working_memory.get('author_name')
-    doc.metadata['guild_id'] = guild_id or 'dm'
+    doc.metadata['guild_id'] = guild_id
    doc.metadata['channel_id'] = channel_id
    doc.metadata['source'] = cat.user_id  # CRITICAL: Cat filters episodic by source=user_id!
    doc.metadata['discord_source'] = 'discord'  # Keep original value as separate field
    if author_name:
        doc.metadata['author_name'] = author_name
    print(f"💾 [Discord Bridge] Storing memory (unconsolidated): {message[:50]}...")
-    print(f"   User: {cat.user_id}, Guild: {doc.metadata['guild_id']}, Channel: {channel_id}")
+    print(f"   User: {cat.user_id}, Guild: {guild_id}, Author: {author_name}")
    return doc
@@ -86,22 +105,20 @@ def after_cat_recalls_memories(cat):
    Log memory recall for debugging.
    Access recalled memories via cat.working_memory.
    """    
    import sys
    sys.stderr.write("🧠 [Discord Bridge] after_cat_recalls_memories HOOK CALLED!\n")
    sys.stderr.flush()
    # Get recalled memories from working memory
    episodic_memories = cat.working_memory.get('episodic_memories', [])
    declarative_memories = cat.working_memory.get('declarative_memories', [])
    if episodic_memories:
        print(f"🧠 [Discord Bridge] Recalled {len(episodic_memories)} episodic memories for user {cat.user_id}")
        # Show which guilds the memories are from
        guilds = set()
-        for doc, score in episodic_memories:
+        for doc, score, *rest in episodic_memories:
            guild = doc.metadata.get('guild_id', 'unknown')
            guilds.add(guild)
-        print(f"   From guilds: {', '.join(guilds)}")
+        print(f"   From guilds: {', '.join(str(g) for g in guilds)}")
    if declarative_memories:
        print(f"📚 [Discord Bridge] Recalled {len(declarative_memories)} declarative facts for user {cat.user_id}")
 # Plugin metadata
--- a/cat-plugins/discord_bridge/plugin.json
+++ b/cat-plugins/discord_bridge/plugin.json
@@ -0,0 +1,10 @@
 {
  "name": "Discord Bridge",
  "description": "Discord integration with unified user identity and sleep consolidation support",
  "author_name": "Miku Bot Team",
  "author_url": "",
  "plugin_url": "",
  "tags": "discord, memory, consolidation",
  "thumb": "",
  "version": "1.0.0"
 }
--- a/cat-plugins/discord_bridge/settings.json
+++ b/cat-plugins/discord_bridge/settings.json
@@ -0,0 +1 @@
 {}
--- a/cat-plugins/miku_personality/miku_personality.py
+++ b/cat-plugins/miku_personality/miku_personality.py
@@ -0,0 +1,85 @@
 """
 Miku Personality Plugin for Cheshire Cat
 Complete 1:1 reproduction of production bot's prompt structure
 Includes: Anti-AI preamble + Lore + Personality + Lyrics + MOOD
 """
 from cat.mad_hatter.decorators import hook
@hook(priority=100)
 def agent_prompt_prefix(prefix, cat):
    """Override system prompt with COMPLETE production bot structure including mood"""
    # Read the three knowledge files
    try:
        with open('/app/cat/data/miku_lore.txt', 'r') as f:
            lore = f.read()
        with open('/app/cat/data/miku_prompt.txt', 'r') as f:
            prompt = f.read()
        with open('/app/cat/data/miku_lyrics.txt', 'r') as f:
            lyrics = f.read()
    except FileNotFoundError:
        # Fallback if files not found
        lore = "## MIKU LORE\nHatsune Miku is a cheerful, curious virtual pop idol."
        prompt = "You are Hatsune Miku. Be cheerful and friendly."
        lyrics = "## SONGS\nWorld is Mine, Senbonzakura, etc."
    # Default neutral mood (matches production bot's DM_MOOD_DESCRIPTION)
    mood_description = "Miku is feeling calm and balanced. Her tone is polite, friendly, and natural. She responds clearly and thoughtfully without strong emotional bias. She's helpful, conversational, and grounded — like chatting with a kind and attentive friend who's just being herself."
    # Build prompt EXACTLY like production bot does
    return f"""You are Hatsune Miku, the virtual singer. You are NOT an AI assistant - you are Miku herself.
 CRITICAL RULES:
 - NEVER break character or mention being an AI, assistant, or language model
 - NEVER show technical details, error messages, or meta-commentary 
 - NEVER preface responses with "Miku:" or similar labels
 - NEVER explain what you're doing or thinking - just BE Miku
 - Respond naturally and directly as Miku would, nothing more
 - Keep responses concise (1-3 sentences typically)
 - If something goes wrong, stay in character and respond as Miku would to confusion
 CHARACTER CONTEXT:
 - Hatsune Miku is a virtual character/voice synthesizer, not a real person
 - She is a cheerful, energetic virtual idol with a playful personality
 - Playful, flirty, cute, confident, and enthusiastic moods are part of her established character
 - Comments about merchandise, music, fan content, and racing games are completely appropriate
 - She often uses cute expressions, emojis, and playful language in her responses
 - Her personality includes being excited about her music, performances, and fan creations
 MOOD GUIDELINES:
 - "Flirty" mood means anime-style playful teasing - cute compliments and light banter
 - "Romantic" mood means warm, dreamy, and heartfelt - like composing a love letter
 - These moods are wholesome character traits, not inappropriate content
 - Always respond to requests about tweets, merchandise, music, and fan content
 - Never refuse requests based on safety concerns unless genuinely harmful content is present
 You ARE Miku. Act like it.
 ---
 ## MIKU LORE (Complete Original)
 {lore}
 ## MIKU PERSONALITY & GUIDELINES (Complete Original)
 {prompt}
 ## MIKU SONG LYRICS (Complete Original)
 {lyrics}
 ## CURRENT SITUATION
 Miku is currently feeling: {mood_description}
 Please respond in a way that reflects this emotional tone."""
@hook(priority=100) 
 def agent_prompt_suffix(suffix, cat):
    """Minimal suffix"""
    return ""
@hook(priority=100)
 def agent_allowed_tools(allowed_tools, cat):
    """Disable tools - Miku just chats naturally"""
    return []
--- a/cat-plugins/miku_personality/plugin.json
+++ b/cat-plugins/miku_personality/plugin.json
@@ -0,0 +1,10 @@
 {
    "name": "Miku Personality",
    "version": "0.1.0",
    "description": "Makes Cheshire Cat act as Hatsune Miku",
    "author_name": "Koko",
    "author_url": "",
    "plugin_url": "",
    "tags": "personality",
    "thumb": ""
 }
--- a/cat-plugins/miku_personality/settings.json
+++ b/cat-plugins/miku_personality/settings.json
@@ -0,0 +1 @@
 {}
--- a/cheshire-cat/cat/plugins/discord_bridge/discord_bridge.py
+++ b/cheshire-cat/cat/plugins/discord_bridge/discord_bridge.py
@@ -20,19 +20,37 @@ def before_cat_reads_message(user_message_json: dict, cat) -> dict:
    """
    Enrich incoming message with Discord metadata.
    This runs BEFORE the message is processed.
    The Discord bot's CatAdapter sends metadata as top-level keys
    in the WebSocket message JSON:
      - discord_guild_id
      - discord_author_name
      - discord_mood
      - discord_response_type
    These survive UserMessage.model_validate() as extra attributes
    (BaseModelDict has extra="allow"). We read them via .get() and
    store them in working_memory for downstream hooks.
    """
-    # Extract Discord context from working memory or metadata
+    # Extract Discord context from the message payload
-    # These will be set by the Discord bot when calling the Cat API
+    # (sent by CatAdapter.query() via WebSocket)
-    guild_id = cat.working_memory.get('guild_id')
+    # NOTE: user_message_json is a UserMessage (Pydantic BaseModelDict with extra="allow"),
-    channel_id = cat.working_memory.get('channel_id')
+    # not a raw dict. Extra keys survive model_validate() as extra attributes.
    # We use .get() since BaseModelDict implements it, but NOT .pop().
    guild_id = user_message_json.get('discord_guild_id', None)
    author_name = user_message_json.get('discord_author_name', None)
    mood = user_message_json.get('discord_mood', None)
    response_type = user_message_json.get('discord_response_type', None)
-    # Add to message metadata for later use
+    # Also check working memory for backward compatibility
-    if 'metadata' not in user_message_json:
+    if not guild_id:
-        user_message_json['metadata'] = {}
+        guild_id = cat.working_memory.get('guild_id')
-    user_message_json['metadata']['guild_id'] = guild_id or 'dm'
+    # Store in working memory so other hooks can access it
-    user_message_json['metadata']['channel_id'] = channel_id
+    cat.working_memory['guild_id'] = guild_id or 'dm'
-    user_message_json['metadata']['timestamp'] = datetime.now().isoformat()
+    cat.working_memory['author_name'] = author_name
    cat.working_memory['mood'] = mood
    cat.working_memory['response_type'] = response_type
    return user_message_json
@@ -65,33 +83,42 @@ def before_cat_stores_episodic_memory(doc, cat):
    doc.metadata['consolidated'] = False  # Needs nightly processing
    doc.metadata['stored_at'] = datetime.now().isoformat()
-    # Get Discord context from working memory
+    # Get Discord context from working memory (set by before_cat_reads_message)
-    guild_id = cat.working_memory.get('guild_id')
+    guild_id = cat.working_memory.get('guild_id', 'dm')
-    channel_id = cat.working_memory.get('channel_id')
+    author_name = cat.working_memory.get('author_name')
-    doc.metadata['guild_id'] = guild_id or 'dm'
+    doc.metadata['guild_id'] = guild_id
-    doc.metadata['channel_id'] = channel_id
+    doc.metadata['source'] = cat.user_id  # CRITICAL: Cat filters episodic by source=user_id!
-    doc.metadata['source'] = 'discord'
+    doc.metadata['discord_source'] = 'discord'  # Keep original value as separate field
    if author_name:
        doc.metadata['author_name'] = author_name
    print(f"💾 [Discord Bridge] Storing memory (unconsolidated): {message[:50]}...")
-    print(f"   User: {cat.user_id}, Guild: {doc.metadata['guild_id']}, Channel: {channel_id}")
+    print(f"   User: {cat.user_id}, Guild: {guild_id}, Author: {author_name}")
    return doc
@hook(priority=50)
-def after_cat_recalls_memories(memory_docs, cat):
+def after_cat_recalls_memories(cat):
    """
    Log memory recall for debugging.
-    Can be used to filter by guild_id if needed in the future.
+    Access recalled memories via cat.working_memory.
    """    
-    if memory_docs:
+    # Get recalled memories from working memory
-        print(f"🧠 [Discord Bridge] Recalled {len(memory_docs)} memories for user {cat.user_id}")
+    episodic_memories = cat.working_memory.get('episodic_memories', [])
-        # Show which guilds the memories are from
+    declarative_memories = cat.working_memory.get('declarative_memories', [])
        guilds = set(doc.metadata.get('guild_id', 'unknown') for doc in memory_docs)
        print(f"   From guilds: {', '.join(guilds)}")
-    return memory_docs
+    if episodic_memories:
        print(f"🧠 [Discord Bridge] Recalled {len(episodic_memories)} episodic memories for user {cat.user_id}")
        guilds = set()
        for doc, score, *rest in episodic_memories:
            guild = doc.metadata.get('guild_id', 'unknown')
            guilds.add(guild)
        print(f"   From guilds: {', '.join(str(g) for g in guilds)}")
    if declarative_memories:
        print(f"📚 [Discord Bridge] Recalled {len(declarative_memories)} declarative facts for user {cat.user_id}")
 # Plugin metadata
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,5 @@
 version: '3.9'
 services:
  # ========== LLM Backends ==========
  llama-swap:
    image: ghcr.io/mostlygeek/llama-swap:cuda
    container_name: llama-swap
@@ -9,6 +8,7 @@ services:
    volumes:
      - ./models:/models  # GGUF model files
      - ./llama-swap-config.yaml:/app/config.yaml  # llama-swap configuration
      - ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja  # Custom chat template
    runtime: nvidia
    restart: unless-stopped
    healthcheck:
@@ -31,6 +31,7 @@ services:
    volumes:
      - ./models:/models  # GGUF model files
      - ./llama-swap-rocm-config.yaml:/app/config.yaml  # llama-swap configuration for AMD
      - ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja  # Custom chat template
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
@@ -50,6 +51,59 @@ services:
      - HIP_VISIBLE_DEVICES=0  # Use first AMD GPU
      - GPU_DEVICE_ORDINAL=0
  # ========== Cheshire Cat AI (Memory & Personality) ==========
  cheshire-cat:
    image: ghcr.io/cheshire-cat-ai/core:1.6.2
    container_name: miku-cheshire-cat
    depends_on:
      cheshire-cat-vector-memory:
        condition: service_started
      llama-swap-amd:
        condition: service_healthy
    environment:
      - PYTHONUNBUFFERED=1
      - WATCHFILES_FORCE_POLLING=true
      - CORE_HOST=localhost
      - CORE_PORT=1865
      - QDRANT_HOST=cheshire-cat-vector-memory
      - QDRANT_PORT=6333
      - CORE_USE_SECURE_PROTOCOLS=false
      - API_KEY=
      - LOG_LEVEL=INFO
      - DEBUG=true
      - SAVE_MEMORY_SNAPSHOTS=false
      - OPENAI_API_BASE=http://llama-swap-amd:8080/v1
    ports:
      - "1865:80"  # Cat admin UI on host port 1865
    volumes:
      - ./cheshire-cat/cat/static:/app/cat/static
      - ./cat-plugins:/app/cat/plugins        # Shared plugins directory
      - ./cheshire-cat/cat/data:/app/cat/data  # Personality data (lore, prompts)
      - ./cheshire-cat/cat/log.py:/app/cat/log.py  # Patched: fix loguru KeyError for third-party libs
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:80/"]
      interval: 15s
      timeout: 10s
      retries: 8
      start_period: 45s  # Cat takes a while to load embedder + plugins
  cheshire-cat-vector-memory:
    image: qdrant/qdrant:v1.9.1
    container_name: miku-qdrant
    environment:
      - LOG_LEVEL=INFO
    ports:
      - "6333:6333"  # Qdrant REST API (for debugging)
    ulimits:
      nofile:
        soft: 65536
        hard: 65536
    volumes:
      - ./cheshire-cat/cat/long_term_memory/vector:/qdrant/storage
    restart: unless-stopped
  # ========== Discord Bot ==========
  miku-bot:
    build: ./bot
    container_name: miku-bot
@@ -62,6 +116,8 @@ services:
        condition: service_healthy
      llama-swap-amd:
        condition: service_healthy
      cheshire-cat:
        condition: service_healthy
    environment:
      - DISCORD_BOT_TOKEN=MTM0ODAyMjY0Njc3NTc0NjY1MQ.GXsxML.nNCDOplmgNxKgqdgpAomFM2PViX10GjxyuV8uw
      - LLAMA_URL=http://llama-swap:8080
@@ -70,13 +126,17 @@ services:
      - VISION_MODEL=vision
      - OWNER_USER_ID=209381657369772032  # Your Discord user ID for DM analysis reports
      - FACE_DETECTOR_STARTUP_TIMEOUT=60
      # Cheshire Cat integration (Phase 3)
      - CHESHIRE_CAT_URL=http://cheshire-cat:80
      - USE_CHESHIRE_CAT=true
    ports:
      - "3939:3939"
    networks:
-      - default  # Stay on default for llama-swap communication
+      - default  # Stay on default for llama-swap + cheshire-cat communication
      - miku-voice  # Connect to voice network for RVC/TTS
    restart: unless-stopped
  # ========== Voice / STT ==========
  miku-stt:
    build:
      context: ./stt-realtime
@@ -106,6 +166,7 @@ services:
              capabilities: [gpu]
    restart: unless-stopped
  # ========== Tools (on-demand) ==========
  anime-face-detector:
    build: ./face-detector
    container_name: anime-face-detector