Web UI tabs made into two rows

Phase 3: Unified Cheshire Cat integration with WebSocket-based per-user isolation
Key changes: - CatAdapter (bot/utils/cat_client.py): WebSocket /ws/{user_id} for chat queries instead of HTTP POST (fixes per-user memory isolation when no API keys are configured — HTTP defaults all users to user_id='user') - Memory management API: 8 endpoints for status, stats, facts, episodic memories, consolidation trigger, multi-step delete with confirmation - Web UI: Memory tab (tab9) with collection stats, fact/episodic browser, manual consolidation trigger, and 3-step delete flow requiring exact confirmation string - Bot integration: Cat-first response path with query_llama fallback for both text and embed responses, server mood detection - Discord bridge plugin: fixed .pop() to .get() (UserMessage is a Pydantic BaseModelDict, not a raw dict), metadata extraction via extra attributes - Unified docker-compose: Cat + Qdrant services merged into main compose, bot depends_on Cat healthcheck - All plugins (discord_bridge, memory_consolidation, miku_personality) consolidated into cat-plugins/ for volume mount - query_llama deprecated but functional for compatibility
2026-02-07 22:16:01 +02:00 · 2026-02-07 20:22:03 +02:00 · 2026-02-07 19:24:46 +02:00
19 changed files with 2216 additions and 778 deletions
--- a/bot/api.py
+++ b/bot/api.py
@@ -2772,6 +2772,134 @@ def set_voice_debug_mode(enabled: bool = Form(...)):
    }
 # ========== Cheshire Cat Memory Management (Phase 3) ==========
 class MemoryDeleteRequest(BaseModel):
    confirmation: str
@app.get("/memory/status")
 async def get_cat_memory_status():
    """Get Cheshire Cat connection status and feature flag."""
    from utils.cat_client import cat_adapter
    is_healthy = await cat_adapter.health_check()
    return {
        "enabled": globals.USE_CHESHIRE_CAT,
        "healthy": is_healthy,
        "url": globals.CHESHIRE_CAT_URL,
        "circuit_breaker_active": cat_adapter._is_circuit_broken(),
        "consecutive_failures": cat_adapter._consecutive_failures
    }
@app.post("/memory/toggle")
 async def toggle_cat_integration(enabled: bool = Form(...)):
    """Toggle Cheshire Cat integration on/off."""
    globals.USE_CHESHIRE_CAT = enabled
    logger.info(f"🐱 Cheshire Cat integration {'ENABLED' if enabled else 'DISABLED'}")
    return {
        "success": True,
        "enabled": globals.USE_CHESHIRE_CAT,
        "message": f"Cheshire Cat {'enabled' if enabled else 'disabled'}"
    }
@app.get("/memory/stats")
 async def get_memory_stats():
    """Get memory collection statistics from Cheshire Cat (point counts per collection)."""
    from utils.cat_client import cat_adapter
    stats = await cat_adapter.get_memory_stats()
    if stats is None:
        return {"success": False, "error": "Could not reach Cheshire Cat"}
    return {"success": True, "collections": stats.get("collections", [])}
@app.get("/memory/facts")
 async def get_memory_facts():
    """Get all declarative memory facts (learned knowledge about users)."""
    from utils.cat_client import cat_adapter
    facts = await cat_adapter.get_all_facts()
    return {"success": True, "facts": facts, "count": len(facts)}
@app.get("/memory/episodic")
 async def get_episodic_memories():
    """Get all episodic memories (conversation snippets)."""
    from utils.cat_client import cat_adapter
    result = await cat_adapter.get_memory_points(collection="episodic", limit=100)
    if result is None:
        return {"success": False, "error": "Could not reach Cheshire Cat"}
    memories = []
    for point in result.get("points", []):
        payload = point.get("payload", {})
        memories.append({
            "id": point.get("id"),
            "content": payload.get("page_content", ""),
            "metadata": payload.get("metadata", {}),
        })
    return {"success": True, "memories": memories, "count": len(memories)}
@app.post("/memory/consolidate")
 async def trigger_memory_consolidation():
    """Manually trigger memory consolidation (sleep consolidation process)."""
    from utils.cat_client import cat_adapter
    logger.info("🌙 Manual memory consolidation triggered via API")
    result = await cat_adapter.trigger_consolidation()
    if result is None:
        return {"success": False, "error": "Consolidation failed or timed out"}
    return {"success": True, "result": result}
@app.post("/memory/delete")
 async def delete_all_memories(request: MemoryDeleteRequest):
    """
    Delete ALL of Miku's memories. Requires exact confirmation string.
    The confirmation field must be exactly:
        "Yes, I am deleting Miku's memories fully."
    This is destructive and irreversible.
    """
    REQUIRED_CONFIRMATION = "Yes, I am deleting Miku's memories fully."
    if request.confirmation != REQUIRED_CONFIRMATION:
        logger.warning(f"Memory deletion rejected: wrong confirmation string")
        return {
            "success": False,
            "error": "Confirmation string does not match. "
                     f"Expected exactly: \"{REQUIRED_CONFIRMATION}\""
        }
    from utils.cat_client import cat_adapter
    logger.warning("⚠️ MEMORY DELETION CONFIRMED — wiping all memories!")
    # Wipe vector memories (episodic + declarative)
    wipe_success = await cat_adapter.wipe_all_memories()
    # Also clear conversation history
    history_success = await cat_adapter.wipe_conversation_history()
    if wipe_success:
        logger.warning("🗑️ All Miku memories have been deleted.")
        return {
            "success": True,
            "message": "All memories have been permanently deleted.",
            "vector_memory_wiped": wipe_success,
            "conversation_history_cleared": history_success
        }
    else:
        return {
            "success": False,
            "error": "Failed to wipe memory collections. Check Cat connection."
        }
@app.delete("/memory/point/{collection}/{point_id}")
 async def delete_single_memory_point(collection: str, point_id: str):
    """Delete a single memory point by collection and ID."""
    from utils.cat_client import cat_adapter
    success = await cat_adapter.delete_memory_point(collection, point_id)
    if success:
        return {"success": True, "deleted": point_id}
    else:
        return {"success": False, "error": f"Failed to delete point {point_id}"}
 def start_api():
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=3939)
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -513,6 +513,26 @@ async def on_message(message):
                            response_type = "dm_response" if is_dm else "server_response"
                            author_name = message.author.display_name
                            # Phase 3: Try Cat pipeline first for embed responses too
                            response = None
                            if globals.USE_CHESHIRE_CAT:
                                try:
                                    from utils.cat_client import cat_adapter
                                    response = await cat_adapter.query(
                                        text=enhanced_prompt,
                                        user_id=str(message.author.id),
                                        guild_id=str(guild_id) if guild_id else None,
                                        author_name=author_name,
                                        mood=globals.DM_MOOD,
                                        response_type=response_type,
                                    )
                                    if response:
                                        logger.info(f"🐱 Cat embed response for {author_name}")
                                except Exception as e:
                                    logger.warning(f"🐱 Cat embed error, fallback: {e}")
                                    response = None
                            if not response:
                                response = await query_llama(
                                    enhanced_prompt,
                                    user_id=str(message.author.id),
@@ -570,6 +590,39 @@ async def on_message(message):
            guild_id = message.guild.id if message.guild else None
            response_type = "dm_response" if is_dm else "server_response"
            author_name = message.author.display_name
            # Phase 3: Try Cheshire Cat pipeline first (memory-augmented response)
            # Falls back to query_llama if Cat is unavailable or disabled
            response = None
            if globals.USE_CHESHIRE_CAT:
                try:
                    from utils.cat_client import cat_adapter
                    current_mood = globals.DM_MOOD
                    if guild_id:
                        try:
                            from server_manager import server_manager
                            sc = server_manager.get_server_config(guild_id)
                            if sc:
                                current_mood = sc.current_mood_name
                        except Exception:
                            pass
                    response = await cat_adapter.query(
                        text=prompt,
                        user_id=str(message.author.id),
                        guild_id=str(guild_id) if guild_id else None,
                        author_name=author_name,
                        mood=current_mood,
                        response_type=response_type,
                    )
                    if response:
                        logger.info(f"🐱 Cat response for {author_name} (mood: {current_mood})")
                except Exception as e:
                    logger.warning(f"🐱 Cat pipeline error, falling back to query_llama: {e}")
                    response = None
            # Fallback to direct LLM query if Cat didn't respond
            if not response:
                response = await query_llama(
                    prompt, 
                    user_id=str(message.author.id), 
--- a/bot/globals.py
+++ b/bot/globals.py
@@ -29,6 +29,12 @@ EVIL_TEXT_MODEL = os.getenv("EVIL_TEXT_MODEL", "darkidol")  # Uncensored model f
 JAPANESE_TEXT_MODEL = os.getenv("JAPANESE_TEXT_MODEL", "swallow")  # Llama 3.1 Swallow model for Japanese
 OWNER_USER_ID = int(os.getenv("OWNER_USER_ID", "209381657369772032"))  # Bot owner's Discord user ID for reports
 # Cheshire Cat AI integration (Phase 3)
 CHESHIRE_CAT_URL = os.getenv("CHESHIRE_CAT_URL", "http://cheshire-cat:80")
 USE_CHESHIRE_CAT = os.getenv("USE_CHESHIRE_CAT", "false").lower() == "true"
 CHESHIRE_CAT_API_KEY = os.getenv("CHESHIRE_CAT_API_KEY", "")  # Empty = no auth
 CHESHIRE_CAT_TIMEOUT = int(os.getenv("CHESHIRE_CAT_TIMEOUT", "120"))  # Seconds
 # Language mode for Miku (english or japanese)
 LANGUAGE_MODE = "english"  # Can be "english" or "japanese"
--- a/bot/static/index.html
+++ b/bot/static/index.html
@@ -416,14 +416,19 @@
    }
    .tab-buttons {
-      display: flex;
+      display: grid;
      grid-template-rows: repeat(2, auto);
      grid-auto-flow: column;
      grid-auto-columns: max-content;
      border-bottom: 2px solid #333;
      margin-bottom: 1rem;
      overflow-x: auto;
      overflow-y: hidden;
      white-space: nowrap;
      scrollbar-width: thin;
      scrollbar-color: #555 #222;
      row-gap: 0.05rem;
      column-gap: 0.1rem;
      padding-bottom: 0.1rem;
    }
    .tab-buttons::-webkit-scrollbar {
@@ -447,12 +452,10 @@
      background: #222;
      color: #ccc;
      border: none;
-      padding: 0.8rem 1.5rem;
+      padding: 0.5rem 1rem;
      cursor: pointer;
      border-bottom: 3px solid transparent;
      margin-right: 0.5rem;
      transition: all 0.3s ease;
      flex-shrink: 0;
      white-space: nowrap;
    }
@@ -665,6 +668,7 @@
      <button class="tab-button" onclick="switchTab('tab6')">📊 Autonomous Stats</button>
      <button class="tab-button" onclick="switchTab('tab7')">💬 Chat with LLM</button>
      <button class="tab-button" onclick="switchTab('tab8')">📞 Voice Call</button>
      <button class="tab-button" onclick="switchTab('tab9')">🧠 Memories</button>
      <button class="tab-button" onclick="window.location.href='/static/system.html'">🎛️ System Settings</button>
    </div>
@@ -1547,6 +1551,142 @@
  </div>
 </div>
    <!-- Tab 9: Memory Management -->
    <div id="tab9" class="tab-content">
      <div class="section">
        <h3>🧠 Cheshire Cat Memory Management</h3>
        <p style="color: #aaa; margin-bottom: 1rem;">
          Manage Miku's long-term memories powered by the Cheshire Cat AI pipeline.
          Memories are stored in Qdrant vector database and used to give Miku persistent knowledge about users.
        </p>
        <!-- Cat Integration Status -->
        <div id="cat-status-section" style="background: #1a1a2e; border: 1px solid #444; border-radius: 8px; padding: 1rem; margin-bottom: 1.5rem;">
          <div style="display: flex; justify-content: space-between; align-items: center;">
            <div>
              <h4 style="margin: 0 0 0.3rem 0;">🐱 Cheshire Cat Status</h4>
              <span id="cat-status-indicator" style="color: #888;">Checking...</span>
            </div>
            <div style="display: flex; gap: 0.5rem; align-items: center;">
              <button id="cat-toggle-btn" onclick="toggleCatIntegration()" style="background: #333; color: #fff; padding: 0.4rem 0.8rem; border: 2px solid #666; border-radius: 4px; cursor: pointer; font-weight: bold; font-size: 0.85rem;">
                Loading...
              </button>
              <button onclick="refreshMemoryStats()" style="background: #2a5599; color: #fff; padding: 0.4rem 0.8rem; border: none; border-radius: 4px; cursor: pointer;">
                🔄 Refresh
              </button>
            </div>
          </div>
        </div>
        <!-- Memory Statistics -->
        <div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem; margin-bottom: 1.5rem;">
          <div id="stat-episodic" style="background: #1a2332; border: 1px solid #2a5599; border-radius: 8px; padding: 1rem; text-align: center;">
            <div style="font-size: 2rem; font-weight: bold; color: #61dafb;" id="stat-episodic-count">—</div>
            <div style="color: #aaa; font-size: 0.85rem;">📝 Episodic Memories</div>
            <div style="color: #666; font-size: 0.75rem; margin-top: 0.3rem;">Conversation snippets</div>
          </div>
          <div id="stat-declarative" style="background: #1a3322; border: 1px solid #2a9955; border-radius: 8px; padding: 1rem; text-align: center;">
            <div style="font-size: 2rem; font-weight: bold; color: #6fdc6f;" id="stat-declarative-count">—</div>
            <div style="color: #aaa; font-size: 0.85rem;">📚 Declarative Facts</div>
            <div style="color: #666; font-size: 0.75rem; margin-top: 0.3rem;">Learned knowledge</div>
          </div>
          <div id="stat-procedural" style="background: #332a1a; border: 1px solid #995e2a; border-radius: 8px; padding: 1rem; text-align: center;">
            <div style="font-size: 2rem; font-weight: bold; color: #dcb06f;" id="stat-procedural-count">—</div>
            <div style="color: #aaa; font-size: 0.85rem;">⚙️ Procedural</div>
            <div style="color: #666; font-size: 0.75rem; margin-top: 0.3rem;">Tools & procedures</div>
          </div>
        </div>
        <!-- Consolidation -->
        <div style="background: #1a1a2e; border: 1px solid #444; border-radius: 8px; padding: 1rem; margin-bottom: 1.5rem;">
          <h4 style="margin: 0 0 0.5rem 0;">🌙 Memory Consolidation</h4>
          <p style="color: #aaa; font-size: 0.85rem; margin-bottom: 0.75rem;">
            Trigger the sleep consolidation process: analyzes episodic memories, extracts important facts, and removes trivial entries.
          </p>
          <div style="display: flex; gap: 0.5rem; align-items: center;">
            <button id="consolidate-btn" onclick="triggerConsolidation()" style="background: #5b3a8c; color: #fff; padding: 0.5rem 1rem; border: none; border-radius: 4px; cursor: pointer; font-weight: bold;">
              🌙 Run Consolidation
            </button>
            <span id="consolidation-status" style="color: #888; font-size: 0.85rem;"></span>
          </div>
          <div id="consolidation-result" style="display: none; margin-top: 0.75rem; background: #111; border: 1px solid #333; border-radius: 4px; padding: 0.75rem; font-size: 0.85rem; color: #ccc; white-space: pre-wrap; max-height: 200px; overflow-y: auto;"></div>
        </div>
        <!-- Declarative Facts Browser -->
        <div style="background: #1a1a2e; border: 1px solid #444; border-radius: 8px; padding: 1rem; margin-bottom: 1.5rem;">
          <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.75rem;">
            <h4 style="margin: 0;">📚 Declarative Facts</h4>
            <button onclick="loadFacts()" style="background: #2a5599; color: #fff; padding: 0.3rem 0.7rem; border: none; border-radius: 4px; cursor: pointer; font-size: 0.85rem;">
              🔄 Load Facts
            </button>
          </div>
          <div id="facts-list" style="max-height: 400px; overflow-y: auto;">
            <div style="text-align: center; color: #666; padding: 2rem;">Click "Load Facts" to view stored knowledge</div>
          </div>
        </div>
        <!-- Episodic Memories Browser -->
        <div style="background: #1a1a2e; border: 1px solid #444; border-radius: 8px; padding: 1rem; margin-bottom: 1.5rem;">
          <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.75rem;">
            <h4 style="margin: 0;">📝 Episodic Memories</h4>
            <button onclick="loadEpisodicMemories()" style="background: #2a5599; color: #fff; padding: 0.3rem 0.7rem; border: none; border-radius: 4px; cursor: pointer; font-size: 0.85rem;">
              🔄 Load Memories
            </button>
          </div>
          <div id="episodic-list" style="max-height: 400px; overflow-y: auto;">
            <div style="text-align: center; color: #666; padding: 2rem;">Click "Load Memories" to view conversation snippets</div>
          </div>
        </div>
        <!-- DANGER ZONE: Delete All Memories -->
        <div style="background: #2e1a1a; border: 2px solid #993333; border-radius: 8px; padding: 1rem;">
          <h4 style="margin: 0 0 0.5rem 0; color: #ff6b6b;">⚠️ Danger Zone — Delete All Memories</h4>
          <p style="color: #cc9999; font-size: 0.85rem; margin-bottom: 1rem;">
            This will permanently erase ALL of Miku's memories — episodic conversations, learned facts, everything.
            This action is <strong>irreversible</strong>. Miku will forget everything she has ever learned.
          </p>
          <!-- Step 1: Initial checkbox -->
          <div id="delete-step-1" style="margin-bottom: 0.75rem;">
            <label style="cursor: pointer; color: #ff9999;">
              <input type="checkbox" id="delete-checkbox-1" onchange="onDeleteStep1Change()">
              I understand this will permanently delete all of Miku's memories
            </label>
          </div>
          <!-- Step 2: Second confirmation (hidden initially) -->
          <div id="delete-step-2" style="display: none; margin-bottom: 0.75rem;">
            <label style="cursor: pointer; color: #ff9999;">
              <input type="checkbox" id="delete-checkbox-2" onchange="onDeleteStep2Change()">
              I confirm this is irreversible and I want to proceed
            </label>
          </div>
          <!-- Step 3: Type confirmation string (hidden initially) -->
          <div id="delete-step-3" style="display: none; margin-bottom: 0.75rem;">
            <p style="color: #ff6b6b; font-size: 0.85rem; margin-bottom: 0.5rem;">
              Type exactly: <code style="background: #333; padding: 0.2rem 0.4rem; border-radius: 3px; color: #ff9999;">Yes, I am deleting Miku's memories fully.</code>
            </p>
            <input type="text" id="delete-confirmation-input" placeholder="Type the confirmation string..."
              style="width: 100%; padding: 0.5rem; background: #1a1a1a; color: #ff9999; border: 1px solid #993333; border-radius: 4px; font-family: monospace; box-sizing: border-box;"
              oninput="onDeleteInputChange()">
          </div>
          <!-- Final delete button (hidden initially) -->
          <div id="delete-step-final" style="display: none;">
            <button id="delete-all-btn" onclick="executeDeleteAllMemories()" disabled
              style="background: #cc3333; color: #fff; padding: 0.5rem 1.5rem; border: none; border-radius: 4px; cursor: not-allowed; font-weight: bold; opacity: 0.5;">
              🗑️ Permanently Delete All Memories
            </button>
            <button onclick="resetDeleteFlow()" style="background: #444; color: #ccc; padding: 0.5rem 1rem; border: none; border-radius: 4px; cursor: pointer; margin-left: 0.5rem;">
              Cancel
            </button>
          </div>
        </div>
      </div>
    </div>
 <div class="logs">
  <h3>Logs</h3>
  <div id="logs-content"></div>
@@ -1611,6 +1751,10 @@ function switchTab(tabId) {
    console.log('🔄 Refreshing figurine subscribers for Server Management tab');
    refreshFigurineSubscribers();
  }
  if (tabId === 'tab9') {
    console.log('🧠 Refreshing memory stats for Memories tab');
    refreshMemoryStats();
  }
 }
 // Initialize
@@ -5020,6 +5164,292 @@ function updateVoiceCallHistoryDisplay() {
  historyDiv.innerHTML = html;
 }
 // ========== Memory Management (Tab 9) ==========
 async function refreshMemoryStats() {
  try {
    // Fetch Cat status
    const statusRes = await fetch('/memory/status');
    const statusData = await statusRes.json();
    const indicator = document.getElementById('cat-status-indicator');
    const toggleBtn = document.getElementById('cat-toggle-btn');
    if (statusData.healthy) {
      indicator.innerHTML = `<span style="color: #6fdc6f;">● Connected</span> — ${statusData.url}`;
    } else {
      indicator.innerHTML = `<span style="color: #ff6b6b;">● Disconnected</span> — ${statusData.url}`;
    }
    if (statusData.circuit_breaker_active) {
      indicator.innerHTML += ` <span style="color: #dcb06f;">(circuit breaker active)</span>`;
    }
    toggleBtn.textContent = statusData.enabled ? '🐱 Cat: ON' : '😿 Cat: OFF';
    toggleBtn.style.background = statusData.enabled ? '#2a7a2a' : '#7a2a2a';
    toggleBtn.style.borderColor = statusData.enabled ? '#4a9a4a' : '#9a4a4a';
    // Fetch memory stats
    const statsRes = await fetch('/memory/stats');
    const statsData = await statsRes.json();
    if (statsData.success && statsData.collections) {
      const collections = {};
      statsData.collections.forEach(c => { collections[c.name] = c.vectors_count; });
      document.getElementById('stat-episodic-count').textContent = collections['episodic'] ?? '—';
      document.getElementById('stat-declarative-count').textContent = collections['declarative'] ?? '—';
      document.getElementById('stat-procedural-count').textContent = collections['procedural'] ?? '—';
    } else {
      document.getElementById('stat-episodic-count').textContent = '—';
      document.getElementById('stat-declarative-count').textContent = '—';
      document.getElementById('stat-procedural-count').textContent = '—';
    }
  } catch (err) {
    console.error('Error refreshing memory stats:', err);
    document.getElementById('cat-status-indicator').innerHTML = '<span style="color: #ff6b6b;">● Error checking status</span>';
  }
 }
 async function toggleCatIntegration() {
  try {
    const statusRes = await fetch('/memory/status');
    const statusData = await statusRes.json();
    const newState = !statusData.enabled;
    const formData = new FormData();
    formData.append('enabled', newState);
    const res = await fetch('/memory/toggle', { method: 'POST', body: formData });
    const data = await res.json();
    if (data.success) {
      showNotification(`Cheshire Cat ${newState ? 'enabled' : 'disabled'}`, newState ? 'success' : 'info');
      refreshMemoryStats();
    }
  } catch (err) {
    showNotification('Failed to toggle Cat integration', 'error');
  }
 }
 async function triggerConsolidation() {
  const btn = document.getElementById('consolidate-btn');
  const status = document.getElementById('consolidation-status');
  const resultDiv = document.getElementById('consolidation-result');
  btn.disabled = true;
  btn.textContent = '⏳ Running...';
  status.textContent = 'Consolidation in progress (this may take a few minutes)...';
  resultDiv.style.display = 'none';
  try {
    const res = await fetch('/memory/consolidate', { method: 'POST' });
    const data = await res.json();
    if (data.success) {
      status.textContent = '✅ Consolidation complete!';
      status.style.color = '#6fdc6f';
      resultDiv.textContent = data.result || 'Consolidation finished successfully.';
      resultDiv.style.display = 'block';
      showNotification('Memory consolidation complete', 'success');
      refreshMemoryStats();
    } else {
      status.textContent = '❌ ' + (data.error || 'Consolidation failed');
      status.style.color = '#ff6b6b';
    }
  } catch (err) {
    status.textContent = '❌ Error: ' + err.message;
    status.style.color = '#ff6b6b';
  } finally {
    btn.disabled = false;
    btn.textContent = '🌙 Run Consolidation';
  }
 }
 async function loadFacts() {
  const listDiv = document.getElementById('facts-list');
  listDiv.innerHTML = '<div style="text-align: center; color: #888; padding: 1rem;">Loading facts...</div>';
  try {
    const res = await fetch('/memory/facts');
    const data = await res.json();
    if (!data.success || data.count === 0) {
      listDiv.innerHTML = '<div style="text-align: center; color: #666; padding: 2rem;">No declarative facts stored yet.</div>';
      return;
    }
    let html = '';
    data.facts.forEach((fact, i) => {
      const source = fact.metadata?.source || 'unknown';
      const when = fact.metadata?.when ? new Date(fact.metadata.when * 1000).toLocaleString() : 'unknown';
      html += `
        <div style="background: #242424; padding: 0.6rem 0.8rem; margin-bottom: 0.4rem; border-radius: 4px; border-left: 3px solid #2a9955; display: flex; justify-content: space-between; align-items: flex-start;">
          <div style="flex: 1;">
            <div style="color: #ddd; font-size: 0.9rem;">${escapeHtml(fact.content)}</div>
            <div style="color: #666; font-size: 0.75rem; margin-top: 0.3rem;">
              Source: ${escapeHtml(source)} · ${when}
            </div>
          </div>
          <button onclick="deleteMemoryPoint('declarative', '${fact.id}', this)" 
            style="background: none; border: none; color: #993333; cursor: pointer; padding: 0.2rem 0.4rem; font-size: 0.85rem; flex-shrink: 0;" 
            title="Delete this fact">🗑️</button>
        </div>`;
    });
    listDiv.innerHTML = `<div style="color: #888; font-size: 0.8rem; margin-bottom: 0.5rem;">${data.count} facts loaded</div>` + html;
  } catch (err) {
    listDiv.innerHTML = `<div style="color: #ff6b6b; padding: 1rem;">Error loading facts: ${err.message}</div>`;
  }
 }
 async function loadEpisodicMemories() {
  const listDiv = document.getElementById('episodic-list');
  listDiv.innerHTML = '<div style="text-align: center; color: #888; padding: 1rem;">Loading memories...</div>';
  try {
    const res = await fetch('/memory/episodic');
    const data = await res.json();
    if (!data.success || data.count === 0) {
      listDiv.innerHTML = '<div style="text-align: center; color: #666; padding: 2rem;">No episodic memories stored yet.</div>';
      return;
    }
    let html = '';
    data.memories.forEach((mem, i) => {
      const source = mem.metadata?.source || 'unknown';
      const when = mem.metadata?.when ? new Date(mem.metadata.when * 1000).toLocaleString() : 'unknown';
      html += `
        <div style="background: #242424; padding: 0.6rem 0.8rem; margin-bottom: 0.4rem; border-radius: 4px; border-left: 3px solid #2a5599; display: flex; justify-content: space-between; align-items: flex-start;">
          <div style="flex: 1;">
            <div style="color: #ddd; font-size: 0.9rem;">${escapeHtml(mem.content)}</div>
            <div style="color: #666; font-size: 0.75rem; margin-top: 0.3rem;">
              Source: ${escapeHtml(source)} · ${when}
            </div>
          </div>
          <button onclick="deleteMemoryPoint('episodic', '${mem.id}', this)" 
            style="background: none; border: none; color: #993333; cursor: pointer; padding: 0.2rem 0.4rem; font-size: 0.85rem; flex-shrink: 0;" 
            title="Delete this memory">🗑️</button>
        </div>`;
    });
    listDiv.innerHTML = `<div style="color: #888; font-size: 0.8rem; margin-bottom: 0.5rem;">${data.count} memories loaded</div>` + html;
  } catch (err) {
    listDiv.innerHTML = `<div style="color: #ff6b6b; padding: 1rem;">Error loading memories: ${err.message}</div>`;
  }
 }
 async function deleteMemoryPoint(collection, pointId, btnElement) {
  if (!confirm(`Delete this ${collection} memory point?`)) return;
  try {
    const res = await fetch(`/memory/point/${collection}/${pointId}`, { method: 'DELETE' });
    const data = await res.json();
    if (data.success) {
      // Remove the row from the UI
      const row = btnElement.closest('div[style*="margin-bottom"]');
      if (row) row.remove();
      showNotification('Memory point deleted', 'success');
      refreshMemoryStats();
    } else {
      showNotification('Failed to delete: ' + (data.error || 'Unknown error'), 'error');
    }
  } catch (err) {
    showNotification('Error: ' + err.message, 'error');
  }
 }
 // Delete All Memories — Multi-step confirmation flow
 function onDeleteStep1Change() {
  const checked = document.getElementById('delete-checkbox-1').checked;
  document.getElementById('delete-step-2').style.display = checked ? 'block' : 'none';
  if (!checked) {
    document.getElementById('delete-checkbox-2').checked = false;
    document.getElementById('delete-step-3').style.display = 'none';
    document.getElementById('delete-step-final').style.display = 'none';
    document.getElementById('delete-confirmation-input').value = '';
  }
 }
 function onDeleteStep2Change() {
  const checked = document.getElementById('delete-checkbox-2').checked;
  document.getElementById('delete-step-3').style.display = checked ? 'block' : 'none';
  document.getElementById('delete-step-final').style.display = checked ? 'block' : 'none';
  if (!checked) {
    document.getElementById('delete-confirmation-input').value = '';
    updateDeleteButton();
  }
 }
 function onDeleteInputChange() {
  updateDeleteButton();
 }
 function updateDeleteButton() {
  const input = document.getElementById('delete-confirmation-input').value;
  const expected = "Yes, I am deleting Miku's memories fully.";
  const btn = document.getElementById('delete-all-btn');
  const match = input === expected;
  btn.disabled = !match;
  btn.style.cursor = match ? 'pointer' : 'not-allowed';
  btn.style.opacity = match ? '1' : '0.5';
 }
 async function executeDeleteAllMemories() {
  const input = document.getElementById('delete-confirmation-input').value;
  const expected = "Yes, I am deleting Miku's memories fully.";
  if (input !== expected) {
    showNotification('Confirmation string does not match', 'error');
    return;
  }
  const btn = document.getElementById('delete-all-btn');
  btn.disabled = true;
  btn.textContent = '⏳ Deleting...';
  try {
    const res = await fetch('/memory/delete', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ confirmation: input })
    });
    const data = await res.json();
    if (data.success) {
      showNotification('All memories have been permanently deleted', 'success');
      resetDeleteFlow();
      refreshMemoryStats();
    } else {
      showNotification('Deletion failed: ' + (data.error || 'Unknown error'), 'error');
    }
  } catch (err) {
    showNotification('Error: ' + err.message, 'error');
  } finally {
    btn.disabled = false;
    btn.textContent = '🗑️ Permanently Delete All Memories';
  }
 }
 function resetDeleteFlow() {
  document.getElementById('delete-checkbox-1').checked = false;
  document.getElementById('delete-checkbox-2').checked = false;
  document.getElementById('delete-confirmation-input').value = '';
  document.getElementById('delete-step-2').style.display = 'none';
  document.getElementById('delete-step-3').style.display = 'none';
  document.getElementById('delete-step-final').style.display = 'none';
  updateDeleteButton();
 }
 function escapeHtml(str) {
  if (!str) return '';
  const div = document.createElement('div');
  div.textContent = str;
  return div.innerHTML;
 }
 </script>
 </body>
--- a/bot/utils/cat_client.py
+++ b/bot/utils/cat_client.py
@@ -0,0 +1,479 @@
 # utils/cat_client.py
 """
 Cheshire Cat AI Adapter for Miku Discord Bot (Phase 3)
 Routes messages through the Cheshire Cat pipeline for:
 - Memory-augmented responses (episodic + declarative recall)
 - Fact extraction and consolidation
 - Per-user conversation isolation
 Uses WebSocket for chat (per-user isolation via /ws/{user_id}).
 Uses HTTP for memory management endpoints.
 Falls back to query_llama() on failure for zero-downtime resilience.
 """
 import aiohttp
 import asyncio
 import json
 import time
 from typing import Optional, Dict, Any, List
 import globals
 from utils.logger import get_logger
 logger = get_logger('cat_client')
 class CatAdapter:
    """
    Async adapter for Cheshire Cat AI.
    Uses WebSocket /ws/{user_id} for conversation (per-user memory isolation).
    Uses HTTP REST for memory management endpoints.
    Without API keys configured, HTTP POST /message defaults all users to
    user_id="user" (no isolation). WebSocket path param gives true isolation.
    """
    def __init__(self):
        self._base_url = globals.CHESHIRE_CAT_URL.rstrip('/')
        self._api_key = globals.CHESHIRE_CAT_API_KEY
        self._timeout = globals.CHESHIRE_CAT_TIMEOUT
        self._healthy = None  # None = unknown, True/False = last check result
        self._last_health_check = 0
        self._health_check_interval = 30  # seconds between health checks
        self._consecutive_failures = 0
        self._max_failures_before_circuit_break = 3
        self._circuit_broken_until = 0  # timestamp when circuit breaker resets
        logger.info(f"CatAdapter initialized: {self._base_url} (timeout={self._timeout}s)")
    def _get_headers(self) -> dict:
        """Build request headers with optional auth."""
        headers = {'Content-Type': 'application/json'}
        if self._api_key:
            headers['Authorization'] = f'Bearer {self._api_key}'
        return headers
    def _user_id_for_discord(self, user_id: str) -> str:
        """
        Format Discord user ID for Cat's user namespace.
        Cat uses user_id to isolate working memory and episodic memories.
        """
        return f"discord_{user_id}"
    async def health_check(self) -> bool:
        """
        Check if Cheshire Cat is reachable and healthy.
        Caches result to avoid hammering the endpoint.
        """
        now = time.time()
        if now - self._last_health_check < self._health_check_interval and self._healthy is not None:
            return self._healthy
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(
                    f"{self._base_url}/",
                    headers=self._get_headers(),
                    timeout=aiohttp.ClientTimeout(total=10)
                ) as response:
                    self._healthy = response.status == 200
                    self._last_health_check = now
                    if self._healthy:
                        logger.debug("Cat health check: OK")
                    else:
                        logger.warning(f"Cat health check failed: status {response.status}")
                    return self._healthy
        except Exception as e:
            self._healthy = False
            self._last_health_check = now
            logger.warning(f"Cat health check error: {e}")
            return False
    def _is_circuit_broken(self) -> bool:
        """Check if circuit breaker is active (too many consecutive failures)."""
        if self._consecutive_failures >= self._max_failures_before_circuit_break:
            if time.time() < self._circuit_broken_until:
                return True
            # Circuit breaker expired, allow retry
            logger.info("Circuit breaker reset, allowing Cat retry")
            self._consecutive_failures = 0
        return False
    async def query(
        self,
        text: str,
        user_id: str,
        guild_id: Optional[str] = None,
        author_name: Optional[str] = None,
        mood: Optional[str] = None,
        response_type: str = "dm_response",
    ) -> Optional[str]:
        """
        Send a message through the Cat pipeline via WebSocket and get a response.
        Uses WebSocket /ws/{user_id} for per-user memory isolation.
        Without API keys, HTTP POST /message defaults all users to user_id="user"
        (no isolation). The WebSocket path parameter provides true per-user isolation
        because Cat's auth handler uses user_id from the path when no keys are set.
        Args:
            text: User's message text
            user_id: Discord user ID (will be namespaced as discord_{user_id})
            guild_id: Optional guild ID for server context
            author_name: Display name of the user
            mood: Current mood name (passed as metadata for Cat hooks)
            response_type: Type of response context
        Returns:
            Cat's response text, or None if Cat is unavailable (caller should fallback)
        """
        if not globals.USE_CHESHIRE_CAT:
            return None
        if self._is_circuit_broken():
            logger.debug("Circuit breaker active, skipping Cat")
            return None
        cat_user_id = self._user_id_for_discord(user_id)
        # Build message payload with Discord metadata for our plugin hooks.
        # The discord_bridge plugin's before_cat_reads_message hook reads
        # these custom keys from the message dict.
        payload = {
            "text": text,
        }
        if guild_id:
            payload["discord_guild_id"] = str(guild_id)
        if author_name:
            payload["discord_author_name"] = author_name
        if mood:
            payload["discord_mood"] = mood
        if response_type:
            payload["discord_response_type"] = response_type
        try:
            # Build WebSocket URL from HTTP base URL
            ws_base = self._base_url.replace("http://", "ws://").replace("https://", "wss://")
            ws_url = f"{ws_base}/ws/{cat_user_id}"
            logger.debug(f"Querying Cat via WS: user={cat_user_id}, text={text[:80]}...")
            async with aiohttp.ClientSession() as session:
                async with session.ws_connect(
                    ws_url,
                    timeout=self._timeout,
                ) as ws:
                    # Send the message
                    await ws.send_json(payload)
                    # Read responses until we get the final "chat" type message.
                    # Cat may send intermediate messages (chat_token for streaming,
                    # notification for status updates). We want the final "chat" one.
                    reply_text = None
                    deadline = asyncio.get_event_loop().time() + self._timeout
                    while True:
                        remaining = deadline - asyncio.get_event_loop().time()
                        if remaining <= 0:
                            logger.error(f"Cat WS timeout after {self._timeout}s")
                            break
                        try:
                            ws_msg = await asyncio.wait_for(
                                ws.receive(),
                                timeout=remaining
                            )
                        except asyncio.TimeoutError:
                            logger.error(f"Cat WS receive timeout after {self._timeout}s")
                            break
                        # Handle WebSocket close/error frames
                        if ws_msg.type in (aiohttp.WSMsgType.CLOSE, aiohttp.WSMsgType.CLOSING, aiohttp.WSMsgType.CLOSED):
                            logger.warning("Cat WS connection closed by server")
                            break
                        if ws_msg.type == aiohttp.WSMsgType.ERROR:
                            logger.error(f"Cat WS error frame: {ws.exception()}")
                            break
                        if ws_msg.type != aiohttp.WSMsgType.TEXT:
                            logger.debug(f"Cat WS non-text frame type: {ws_msg.type}")
                            continue
                        try:
                            msg = json.loads(ws_msg.data)
                        except (json.JSONDecodeError, TypeError) as e:
                            logger.warning(f"Cat WS non-JSON message: {e}")
                            continue
                        msg_type = msg.get("type", "")
                        if msg_type == "chat":
                            # Final response — extract text
                            reply_text = msg.get("content") or msg.get("text", "")
                            break
                        elif msg_type == "chat_token":
                            # Streaming token — skip, we wait for final
                            continue
                        elif msg_type == "error":
                            error_desc = msg.get("description", "Unknown Cat error")
                            logger.error(f"Cat WS error: {error_desc}")
                            break
                        elif msg_type == "notification":
                            logger.debug(f"Cat notification: {msg.get('content', '')}")
                            continue
                        else:
                            logger.debug(f"Cat WS unknown msg type: {msg_type}")
                            continue
            if reply_text and reply_text.strip():
                self._consecutive_failures = 0
                logger.info(f"🐱 Cat response for {cat_user_id}: {reply_text[:100]}...")
                return reply_text
            else:
                logger.warning("Cat returned empty response via WS")
                self._consecutive_failures += 1
                return None
        except asyncio.TimeoutError:
            logger.error(f"Cat WS connection timeout after {self._timeout}s")
            self._consecutive_failures += 1
            if self._consecutive_failures >= self._max_failures_before_circuit_break:
                self._circuit_broken_until = time.time() + 60
                logger.warning("Circuit breaker activated (WS timeout)")
            return None
        except Exception as e:
            logger.error(f"Cat WS query error: {e}")
            self._consecutive_failures += 1
            if self._consecutive_failures >= self._max_failures_before_circuit_break:
                self._circuit_broken_until = time.time() + 60
                logger.warning(f"Circuit breaker activated: {e}")
            return None
    # ===================================================================
    # MEMORY MANAGEMENT API (for Web UI)
    # ===================================================================
    async def get_memory_stats(self) -> Optional[Dict[str, Any]]:
        """
        Get memory collection statistics from Cat.
        Returns dict with collection names and point counts.
        """
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(
                    f"{self._base_url}/memory/collections",
                    headers=self._get_headers(),
                    timeout=aiohttp.ClientTimeout(total=15)
                ) as response:
                    if response.status == 200:
                        data = await response.json()
                        return data
                    else:
                        logger.error(f"Failed to get memory stats: {response.status}")
                        return None
        except Exception as e:
            logger.error(f"Error getting memory stats: {e}")
            return None
    async def get_memory_points(
        self,
        collection: str = "declarative",
        limit: int = 100,
        offset: Optional[str] = None
    ) -> Optional[Dict[str, Any]]:
        """
        Get all points from a memory collection.
        Returns paginated list of memory points.
        """
        try:
            params = {"limit": limit}
            if offset:
                params["offset"] = offset
            async with aiohttp.ClientSession() as session:
                async with session.get(
                    f"{self._base_url}/memory/collections/{collection}/points",
                    headers=self._get_headers(),
                    params=params,
                    timeout=aiohttp.ClientTimeout(total=30)
                ) as response:
                    if response.status == 200:
                        return await response.json()
                    else:
                        logger.error(f"Failed to get {collection} points: {response.status}")
                        return None
        except Exception as e:
            logger.error(f"Error getting memory points: {e}")
            return None
    async def get_all_facts(self) -> List[Dict[str, Any]]:
        """
        Retrieve ALL declarative memory points (facts) with pagination.
        Returns a flat list of all fact dicts.
        """
        all_facts = []
        offset = None
        try:
            while True:
                result = await self.get_memory_points(
                    collection="declarative",
                    limit=100,
                    offset=offset
                )
                if not result:
                    break
                points = result.get("points", [])
                for point in points:
                    payload = point.get("payload", {})
                    fact = {
                        "id": point.get("id"),
                        "content": payload.get("page_content", ""),
                        "metadata": payload.get("metadata", {}),
                    }
                    all_facts.append(fact)
                offset = result.get("next_offset")
                if not offset:
                    break
            logger.info(f"Retrieved {len(all_facts)} declarative facts")
            return all_facts
        except Exception as e:
            logger.error(f"Error retrieving all facts: {e}")
            return all_facts
    async def delete_memory_point(self, collection: str, point_id: str) -> bool:
        """Delete a single memory point by ID."""
        try:
            async with aiohttp.ClientSession() as session:
                async with session.delete(
                    f"{self._base_url}/memory/collections/{collection}/points/{point_id}",
                    headers=self._get_headers(),
                    timeout=aiohttp.ClientTimeout(total=15)
                ) as response:
                    if response.status == 200:
                        logger.info(f"Deleted point {point_id} from {collection}")
                        return True
                    else:
                        logger.error(f"Failed to delete point: {response.status}")
                        return False
        except Exception as e:
            logger.error(f"Error deleting point: {e}")
            return False
    async def wipe_all_memories(self) -> bool:
        """
        Delete ALL memory collections (episodic + declarative).
        This is the nuclear option — requires multi-step confirmation in the UI.
        """
        try:
            async with aiohttp.ClientSession() as session:
                async with session.delete(
                    f"{self._base_url}/memory/collections",
                    headers=self._get_headers(),
                    timeout=aiohttp.ClientTimeout(total=30)
                ) as response:
                    if response.status == 200:
                        logger.warning("🗑️ ALL memory collections wiped!")
                        return True
                    else:
                        error = await response.text()
                        logger.error(f"Failed to wipe memories: {response.status} - {error}")
                        return False
        except Exception as e:
            logger.error(f"Error wiping memories: {e}")
            return False
    async def wipe_conversation_history(self) -> bool:
        """Clear working memory / conversation history."""
        try:
            async with aiohttp.ClientSession() as session:
                async with session.delete(
                    f"{self._base_url}/memory/conversation_history",
                    headers=self._get_headers(),
                    timeout=aiohttp.ClientTimeout(total=15)
                ) as response:
                    if response.status == 200:
                        logger.info("Conversation history cleared")
                        return True
                    else:
                        logger.error(f"Failed to clear conversation history: {response.status}")
                        return False
        except Exception as e:
            logger.error(f"Error clearing conversation history: {e}")
            return False
    async def trigger_consolidation(self) -> Optional[str]:
        """
        Trigger memory consolidation by sending a special message via WebSocket.
        The memory_consolidation plugin's tool 'consolidate_memories' is
        triggered when it sees 'consolidate now' in the text.
        Uses WebSocket with a system user ID for proper context.
        """
        try:
            ws_base = self._base_url.replace("http://", "ws://").replace("https://", "wss://")
            ws_url = f"{ws_base}/ws/system_consolidation"
            logger.info("🌙 Triggering memory consolidation via WS...")
            async with aiohttp.ClientSession() as session:
                async with session.ws_connect(
                    ws_url,
                    timeout=300,  # Consolidation can be very slow
                ) as ws:
                    await ws.send_json({"text": "consolidate now"})
                    # Wait for the final chat response
                    deadline = asyncio.get_event_loop().time() + 300
                    while True:
                        remaining = deadline - asyncio.get_event_loop().time()
                        if remaining <= 0:
                            logger.error("Consolidation timed out (>300s)")
                            return "Consolidation timed out"
                        try:
                            ws_msg = await asyncio.wait_for(
                                ws.receive(),
                                timeout=remaining
                            )
                        except asyncio.TimeoutError:
                            logger.error("Consolidation WS receive timeout")
                            return "Consolidation timed out waiting for response"
                        if ws_msg.type in (aiohttp.WSMsgType.CLOSE, aiohttp.WSMsgType.CLOSING, aiohttp.WSMsgType.CLOSED):
                            logger.warning("Consolidation WS closed by server")
                            return "Connection closed during consolidation"
                        if ws_msg.type == aiohttp.WSMsgType.ERROR:
                            return f"WebSocket error: {ws.exception()}"
                        if ws_msg.type != aiohttp.WSMsgType.TEXT:
                            continue
                        try:
                            msg = json.loads(ws_msg.data)
                        except (json.JSONDecodeError, TypeError):
                            continue
                        msg_type = msg.get("type", "")
                        if msg_type == "chat":
                            reply = msg.get("content") or msg.get("text", "")
                            logger.info(f"Consolidation result: {reply[:200]}")
                            return reply
                        elif msg_type == "error":
                            error_desc = msg.get("description", "Unknown error")
                            logger.error(f"Consolidation error: {error_desc}")
                            return f"Consolidation error: {error_desc}"
                        else:
                            continue
        except asyncio.TimeoutError:
            logger.error("Consolidation WS connection timed out")
            return None
        except Exception as e:
            logger.error(f"Consolidation error: {e}")
            return None
 # Singleton instance
 cat_adapter = CatAdapter()
--- a/bot/utils/llm.py
+++ b/bot/utils/llm.py
@@ -152,6 +152,13 @@ async def query_llama(user_prompt, user_id, guild_id=None, response_type="dm_res
    """
    Query llama.cpp server via llama-swap with OpenAI-compatible API.
    .. deprecated:: Phase 3
        For main conversation flow, prefer routing through the Cheshire Cat pipeline
        (via cat_client.CatAdapter.query) which provides memory-augmented responses.
        This function remains available for specialized use cases (vision, bipolar mode,
        image generation, autonomous, sentiment analysis) and as a fallback when Cat
        is unavailable.
    Args:
        user_prompt: The user's input
        user_id: User identifier (used for DM history)
--- a/cat-plugins/discord_bridge/discord_bridge.py
+++ b/cat-plugins/discord_bridge/discord_bridge.py
@@ -0,0 +1,126 @@
 """
 Discord Bridge Plugin for Cheshire Cat
 This plugin enriches Cat's memory system with Discord context:
 - Unified user identity across all servers and DMs
 - Guild/channel metadata for context tracking
 - Minimal filtering before storage (only skip obvious junk)
 - Marks memories as unconsolidated for nightly processing
 Phase 1 Implementation
 """
 from cat.mad_hatter.decorators import hook
 from datetime import datetime
 import re
@hook(priority=100)
 def before_cat_reads_message(user_message_json: dict, cat) -> dict:
    """
    Enrich incoming message with Discord metadata.
    This runs BEFORE the message is processed.
    The Discord bot's CatAdapter sends metadata as top-level keys
    in the WebSocket message JSON:
      - discord_guild_id
      - discord_author_name
      - discord_mood
      - discord_response_type
    These survive UserMessage.model_validate() as extra attributes
    (BaseModelDict has extra="allow"). We read them via .get() and
    store them in working_memory for downstream hooks.
    """
    # Extract Discord context from the message payload
    # (sent by CatAdapter.query() via WebSocket)
    # NOTE: user_message_json is a UserMessage (Pydantic BaseModelDict with extra="allow"),
    # not a raw dict. Extra keys survive model_validate() as extra attributes.
    # We use .get() since BaseModelDict implements it, but NOT .pop().
    guild_id = user_message_json.get('discord_guild_id', None)
    author_name = user_message_json.get('discord_author_name', None)
    mood = user_message_json.get('discord_mood', None)
    response_type = user_message_json.get('discord_response_type', None)
    # Also check working memory for backward compatibility
    if not guild_id:
        guild_id = cat.working_memory.get('guild_id')
    # Store in working memory so other hooks can access it
    cat.working_memory['guild_id'] = guild_id or 'dm'
    cat.working_memory['author_name'] = author_name
    cat.working_memory['mood'] = mood
    cat.working_memory['response_type'] = response_type
    return user_message_json
@hook(priority=100)
 def before_cat_stores_episodic_memory(doc, cat):
    """
    Filter and enrich memories before storage.
    Phase 1: Minimal filtering
    - Skip only obvious junk (1-2 char messages, pure reactions)
    - Store everything else temporarily
    - Mark as unconsolidated for nightly processing
    """
    message = doc.page_content.strip()
    # Skip only the most trivial messages
    skip_patterns = [
        r'^\w{1,2}$',  # 1-2 character messages: "k", "ok"
        r'^(lol|lmao|haha|hehe|xd|rofl)$',  # Pure reactions
        r'^:[\w_]+:$',  # Discord emoji only: ":smile:"
    ]
    for pattern in skip_patterns:
        if re.match(pattern, message.lower()):
            print(f"🗑️  [Discord Bridge] Skipping trivial message: {message}")
            return None  # Don't store at all
    # Add Discord metadata to memory
    doc.metadata['consolidated'] = False  # Needs nightly processing
    doc.metadata['stored_at'] = datetime.now().isoformat()
    # Get Discord context from working memory (set by before_cat_reads_message)
    guild_id = cat.working_memory.get('guild_id', 'dm')
    author_name = cat.working_memory.get('author_name')
    doc.metadata['guild_id'] = guild_id
    doc.metadata['source'] = cat.user_id  # CRITICAL: Cat filters episodic by source=user_id!
    doc.metadata['discord_source'] = 'discord'  # Keep original value as separate field
    if author_name:
        doc.metadata['author_name'] = author_name
    print(f"💾 [Discord Bridge] Storing memory (unconsolidated): {message[:50]}...")
    print(f"   User: {cat.user_id}, Guild: {guild_id}, Author: {author_name}")
    return doc
@hook(priority=50)
 def after_cat_recalls_memories(cat):
    """
    Log memory recall for debugging.
    Access recalled memories via cat.working_memory.
    """    
    # Get recalled memories from working memory
    episodic_memories = cat.working_memory.get('episodic_memories', [])
    declarative_memories = cat.working_memory.get('declarative_memories', [])
    if episodic_memories:
        print(f"🧠 [Discord Bridge] Recalled {len(episodic_memories)} episodic memories for user {cat.user_id}")
        guilds = set()
        for doc, score, *rest in episodic_memories:
            guild = doc.metadata.get('guild_id', 'unknown')
            guilds.add(guild)
        print(f"   From guilds: {', '.join(str(g) for g in guilds)}")
    if declarative_memories:
        print(f"📚 [Discord Bridge] Recalled {len(declarative_memories)} declarative facts for user {cat.user_id}")
 # Plugin metadata
 __version__ = "1.0.0"
 __description__ = "Discord bridge with unified user identity and sleep consolidation support"
--- a/cat-plugins/discord_bridge/plugin.json
+++ b/cat-plugins/discord_bridge/plugin.json
@@ -0,0 +1,10 @@
 {
  "name": "Discord Bridge",
  "description": "Discord integration with unified user identity and sleep consolidation support",
  "author_name": "Miku Bot Team",
  "author_url": "",
  "plugin_url": "",
  "tags": "discord, memory, consolidation",
  "thumb": "",
  "version": "1.0.0"
 }
--- a/cat-plugins/discord_bridge/settings.json
+++ b/cat-plugins/discord_bridge/settings.json
@@ -0,0 +1 @@
 {}
--- a/cat-plugins/memory_consolidation/memory_consolidation.py
+++ b/cat-plugins/memory_consolidation/memory_consolidation.py
@@ -5,25 +5,32 @@ Phase 2: Sleep Consolidation Implementation
 Implements human-like memory consolidation:
 1. During the day: Store almost everything temporarily
-2. At night (3 AM): Analyze conversations, keep important, delete trivial
+2. On demand (or scheduled): Analyze conversations, keep important, delete trivial
-3. Extract facts for declarative memory
+3. Extract facts for declarative memory (per-user)
 This mimics how human brains consolidate memories during REM sleep.
 """
-from cat.mad_hatter.decorators import hook, plugin, tool
+from cat.mad_hatter.decorators import hook, tool
-from cat.mad_hatter.decorators import CatHook
+from datetime import datetime
 from datetime import datetime, timedelta
 import json
 import asyncio
 import os
 from typing import List, Dict, Any
-print("🌙 [Consolidation Plugin] Loading...")
+print("\U0001f319 [Consolidation Plugin] Loading...")
 # Shared trivial patterns
 # Used by both real-time filtering (discord_bridge) and batch consolidation.
 # Keep this in sync with discord_bridge's skip_patterns.
 TRIVIAL_PATTERNS = frozenset([
    'lol', 'k', 'ok', 'okay', 'haha', 'lmao', 'xd', 'rofl', 'lmfao',
    'brb', 'gtg', 'afk', 'ttyl', 'lmk', 'idk', 'tbh', 'imo', 'imho',
    'omg', 'wtf', 'fyi', 'btw', 'nvm', 'jk', 'ikr', 'smh',
    'hehe', 'heh', 'gg', 'wp', 'gz', 'gj', 'ty', 'thx', 'np', 'yw',
    'nice', 'cool', 'neat', 'wow', 'yep', 'nope', 'yeah', 'nah',
 ])
-
+# Consolidation state
 # Store consolidation state
 consolidation_state = {
    'last_run': None,
    'is_running': False,
@@ -36,442 +43,97 @@ consolidation_state = {
 }
-async def consolidate_user_memories(user_id: str, memories: List[Any], cat) -> Dict[str, Any]:
+# ===================================================================
-    """
+# HOOKS
-    Analyze all of a user's conversations from the day in ONE LLM call.
+# ===================================================================
    This is the core intelligence - Miku sees patterns, themes, relationship evolution.
    """
    # Build conversation timeline
    timeline = []
    for mem in sorted(memories, key=lambda m: m.metadata.get('stored_at', '')):
        timeline.append({
            'time': mem.metadata.get('stored_at', ''),
            'guild': mem.metadata.get('guild_id', 'unknown'),
            'channel': mem.metadata.get('channel_id', 'unknown'),
            'content': mem.page_content[:200]  # Truncate for context window
        })
    # Build consolidation prompt
    consolidation_prompt = f"""You are Miku, reviewing your conversations with user {user_id} from today.
 Look at the full timeline and decide what's worth remembering long-term.
 Timeline of {len(timeline)} conversations:
 {json.dumps(timeline, indent=2)}
 Analyze holistically:
 1. What did you learn about this person today?
 2. Any recurring themes or important moments?
 3. How did your relationship with them evolve?
 4. Which conversations were meaningful vs casual chitchat?
 For EACH conversation (by index), decide:
 - keep: true/false (should this go to long-term memory?)
 - importance: 1-10 (10 = life-changing event, 1 = forget immediately)
 - categories: list of ["personal", "preference", "emotional", "event", "relationship"]
 - insights: What did you learn? (for declarative memory)
 - summary: One sentence for future retrieval
 Respond with VALID JSON (no extra text):
 {{
    "day_summary": "One sentence about this person based on today",
    "relationship_change": "How your relationship evolved (if at all)",
    "conversations": [
        {{
            "index": 0,
            "keep": true,
            "importance": 8,
            "categories": ["personal", "emotional"],
            "insights": "User struggles with anxiety, needs support",
            "summary": "User opened up about their anxiety"
        }},
        {{
            "index": 1,
            "keep": false,
            "importance": 2,
            "categories": [],
            "insights": null,
            "summary": "Just casual greeting"
        }}
    ],
    "new_facts": [
        "User has anxiety",
        "User trusts Miku enough to open up"
    ]
 }}
 """
    try:
        # Call LLM for analysis
        print(f"🌙 [Consolidation] Analyzing {len(memories)} memories for {user_id}...")
        # Use the Cat's LLM
        from cat.looking_glass.cheshire_cat import CheshireCat
        response = cat.llm(consolidation_prompt)
        # Parse JSON response
        # Remove markdown code blocks if present
        response = response.strip()
        if response.startswith('```'):
            response = response.split('```')[1]
            if response.startswith('json'):
                response = response[4:]
        analysis = json.loads(response)
        return analysis
    except json.JSONDecodeError as e:
        print(f"❌ [Consolidation] Failed to parse LLM response: {e}")
        print(f"   Response: {response[:200]}...")
        # Default: keep everything if parsing fails
        return {
            "day_summary": "Unable to analyze",
            "relationship_change": "Unknown",
            "conversations": [
                {"index": i, "keep": True, "importance": 5, "categories": [], "insights": None, "summary": "Kept by default"}
                for i in range(len(memories))
            ],
            "new_facts": []
        }
    except Exception as e:
        print(f"❌ [Consolidation] Error during analysis: {e}")
        return {
            "day_summary": "Error during analysis",
            "relationship_change": "Unknown",
            "conversations": [
                {"index": i, "keep": True, "importance": 5, "categories": [], "insights": None, "summary": "Kept by default"}
                for i in range(len(memories))
            ],
            "new_facts": []
        }
 async def run_consolidation(cat):
    """
    Main consolidation task.
    Run at 3 AM or on-demand via admin endpoint.
    """
    if consolidation_state['is_running']:
        print("⚠️  [Consolidation] Already running, skipping...")
        return
    try:
        consolidation_state['is_running'] = True
        print(f"🌙 [Consolidation] Starting memory consolidation at {datetime.now()}")
        # Get episodic memory collection
        print("📊 [Consolidation] Fetching unconsolidated memories...")
        episodic_memory = cat.memory.vectors.episodic
        # Get all points from episodic memory
        # Qdrant API: scroll through all points
        try:
            from qdrant_client.models import Filter, FieldCondition, MatchValue
            # Query for unconsolidated memories
            # Filter by consolidated=False
            filter_condition = Filter(
                must=[
                    FieldCondition(
                        key="metadata.consolidated",
                        match=MatchValue(value=False)
                    )
                ]
            )
            # Get all unconsolidated memories
            results = episodic_memory.client.scroll(
                collection_name=episodic_memory.collection_name,
                scroll_filter=filter_condition,
                limit=1000,  # Max per batch
                with_payload=True,
                with_vectors=False
            )
            memories = results[0] if results else []
            print(f"📊 [Consolidation] Found {len(memories)} unconsolidated memories")
            if len(memories) == 0:
                print("✨ [Consolidation] No memories to consolidate!")
                return
            # Group by user_id
            memories_by_user = {}
            for point in memories:
                # Extract user_id from metadata or ID
                user_id = point.payload.get('metadata', {}).get('user_id', 'unknown')
                if user_id == 'unknown':
                    # Try to extract from ID format
                    continue
                if user_id not in memories_by_user:
                    memories_by_user[user_id] = []
                memories_by_user[user_id].append(point)
            print(f"📊 [Consolidation] Processing {len(memories_by_user)} users")
            # Process each user
            total_kept = 0
            total_deleted = 0
            total_processed = 0
            for user_id, user_memories in memories_by_user.items():
                print(f"\n👤 [Consolidation] Processing user: {user_id} ({len(user_memories)} memories)")
                # Simulate consolidation for now
                # In Phase 2 complete, this will call consolidate_user_memories()
                for memory in user_memories:
                    total_processed += 1
                    # Simple heuristic for testing
                    content = memory.payload.get('page_content', '')
                    # Delete if very short or common reactions
                    if len(content.strip()) <= 2 or content.lower().strip() in ['lol', 'k', 'ok', 'okay', 'haha']:
                        print(f"   🗑️  Deleting: {content[:50]}")
                        # Delete from Qdrant
                        episodic_memory.client.delete(
                            collection_name=episodic_memory.collection_name,
                            points_selector=[memory.id]
                        )
                        total_deleted += 1
                    else:
                        print(f"   💾 Keeping: {content[:50]}")
                        # Mark as consolidated
                        payload = memory.payload
                        if 'metadata' not in payload:
                            payload['metadata'] = {}
                        payload['metadata']['consolidated'] = True
                        payload['metadata']['importance'] = 5  # Default importance
                        # Update in Qdrant
                        episodic_memory.client.set_payload(
                            collection_name=episodic_memory.collection_name,
                            payload=payload,
                            points=[memory.id]
                        )
                        total_kept += 1
            consolidation_state['stats']['total_processed'] = total_processed
            consolidation_state['stats']['kept'] = total_kept
            consolidation_state['stats']['deleted'] = total_deleted
            consolidation_state['last_run'] = datetime.now()
            print(f"\n✨ [Consolidation] Complete! Stats:")
            print(f"   Processed: {total_processed}")
            print(f"   Kept: {total_kept}")
            print(f"   Deleted: {total_deleted}")
            print(f"   Facts learned: {consolidation_state['stats']['facts_learned']}")
        except Exception as e:
            print(f"❌ [Consolidation] Error querying memories: {e}")
            import traceback
            traceback.print_exc()
    except Exception as e:
        print(f"❌ [Consolidation] Error: {e}")
        import traceback
        traceback.print_exc()
    finally:
        consolidation_state['is_running'] = False
@hook(priority=50)
 def after_cat_bootstrap(cat):
-    """
+    """Run after Cat starts up."""
-    Run after Cat starts up.
+    print("\U0001f319 [Memory Consolidation] Plugin loaded")
-    Schedule nightly consolidation task.
+    print("   Manual consolidation available via 'consolidate now' command")
    """
    print("🌙 [Memory Consolidation] Plugin loaded")
    print("   Scheduling nightly consolidation for 3:00 AM")
    # TODO: Implement scheduler (APScheduler or similar)
    # For now, just log that we're ready
    return None
 # NOTE: before_cat_sends_message is defined below (line ~438) with merged logic
@hook(priority=10)
 def before_cat_recalls_memories(cat):
    """
    Retrieve declarative facts BEFORE Cat recalls episodic memories.
    This ensures facts are available when building the prompt.
    Note: This hook may not execute in all Cat versions - kept for compatibility.
    """
    pass  # Declarative search now happens in agent_prompt_prefix
@hook(priority=45)
 def after_cat_recalls_memories(cat):
    """
    Hook placeholder for after memory recall.
    Currently unused but kept for future enhancements.
    """
    pass
 # Manual trigger via agent_prompt_prefix hook
@hook(priority=10)
 def agent_prompt_prefix(prefix, cat):
    """
-    1. Search and inject declarative facts into the prompt
+    Runs AFTER miku_personality (priority 100) sets the base prompt.
-    2. Handle admin commands like 'consolidate now'
+    1. Search and inject declarative facts into the prompt.
    2. Handle 'consolidate now' command.
    """
    # PART 1: Search for declarative facts and inject into prompt
    try:
    user_message_json = cat.working_memory.get('user_message_json', {})
    user_text = user_message_json.get('text', '').strip()
-        if user_text:
+    # PART 1: Inject declarative facts
-            # Search declarative memory
+    try:
        if user_text and user_text.lower() not in ('consolidate', 'consolidate now', '/consolidate'):
            declarative_memory = cat.memory.vectors.declarative
            embedding = cat.embedder.embed_query(user_text)
            results = declarative_memory.recall_memories_from_embedding(
                embedding=embedding,
-                metadata=None,
+                metadata={"source": cat.user_id},
                k=5
            )
            if results:
-                high_confidence_facts = []
+                high_confidence_facts = [
-                for item in results:
+                    item[0].page_content
-                    doc = item[0]
+                    for item in results
-                    score = item[1]
+                    if item[1] > 0.5
-                    if score > 0.5:  # Only reasonably relevant facts
+                ]
                        high_confidence_facts.append(doc.page_content)
                if high_confidence_facts:
-                    facts_text = "\n\n## 📝 Personal Facts About the User:\n"
+                    facts_text = "\n\n## Personal Facts About the User:\n"
                    for fact in high_confidence_facts:
                        facts_text += f"- {fact}\n"
                    facts_text += "\n(Use these facts when answering the user's question)\n"
                    prefix += facts_text
-                    print(f"✅ [Declarative] Injected {len(high_confidence_facts)} facts into prompt")
+                    print(f"[Declarative] Injected {len(high_confidence_facts)} facts into prompt")
    except Exception as e:
-        print(f"❌ [Declarative] Error: {e}")
+        print(f"[Declarative] Error: {e}")
    # PART 2: Handle consolidation command
-    user_message = cat.working_memory.get('user_message_json', {})
+    if user_text.lower() in ('consolidate', 'consolidate now', '/consolidate'):
-    user_text = user_message.get('text', '').lower().strip()
+        print("[Consolidation] Manual trigger command received!")
        trigger_consolidation_sync(cat)
    if user_text in ['consolidate', 'consolidate now', '/consolidate']:
        print("🔧 [Consolidation] Manual trigger command received!")
        # Run consolidation synchronously
        import asyncio
        try:
            # Try to get the current event loop
            loop = asyncio.get_event_loop()
            if loop.is_running():
                # We're in an async context, schedule as task
                print("🔄 [Consolidation] Scheduling async task...")
                # Run synchronously using run_until_complete won't work here
                # Instead, we'll use the manual non-async version
                result = trigger_consolidation_sync(cat)
            else:
                # Not in async context, safe to run_until_complete
                result = loop.run_until_complete(run_consolidation(cat))
        except RuntimeError:
            # Fallback to sync version
            result = trigger_consolidation_sync(cat)
        # Store the result in working memory so it can be used by other hooks
        stats = consolidation_state['stats']
        cat.working_memory['consolidation_triggered'] = True
        cat.working_memory['consolidation_stats'] = stats
    return prefix
 print("✅ [Consolidation Plugin] agent_prompt_prefix hook registered")
 # Intercept the response to replace with consolidation stats
@hook(priority=10)
 def before_cat_sends_message(message, cat):
    """
-    1. Inject declarative facts into response context
+    1. Replace response with consolidation stats if consolidation was triggered.
-    2. Replace response if consolidation was triggered
+    2. Store Miku's response in episodic memory (bidirectional memory).
    """
    import sys
    sys.stderr.write("\n<EFBFBD> [before_cat_sends_message] Hook executing...\n")
    sys.stderr.flush()
-    # PART 1: Inject declarative facts
+    # PART 1: Consolidation response replacement
    try:
        user_message_json = cat.working_memory.get('user_message_json', {})
        user_text = user_message_json.get('text', '')
        if user_text and not cat.working_memory.get('consolidation_triggered', False):
            # Search declarative memory for relevant facts
            declarative_memory = cat.memory.vectors.declarative
            embedding = cat.embedder.embed_query(user_text)
            results = declarative_memory.recall_memories_from_embedding(
                embedding=embedding,
                metadata=None,
                k=5
            )
            if results:
                sys.stderr.write(f"💡 [Declarative] Found {len(results)} facts!\n")
                # Results format: [(doc, score, vector, id), ...] - ignore vector and id
                high_confidence_facts = []
                for item in results:
                    doc = item[0]
                    score = item[1]
                    if score > 0.5:  # Only reasonably relevant facts
                        sys.stderr.write(f"   - [{score:.2f}] {doc.page_content}\n")
                        high_confidence_facts.append(doc.page_content)
                # Store facts in working memory so agent_prompt_prefix can use them
                if high_confidence_facts:
                    cat.working_memory['declarative_facts'] = high_confidence_facts
                    sys.stderr.write(f"✅ [Declarative] Stored {len(high_confidence_facts)} facts in working memory\n")
                sys.stderr.flush()
    except Exception as e:
        sys.stderr.write(f"❌ [Declarative] Error: {e}\n")
        sys.stderr.flush()
    # PART 2: Handle consolidation response replacement
    if cat.working_memory.get('consolidation_triggered', False):
-        print("📝 [Consolidation] Replacing message with stats")
+        print("[Consolidation] Replacing message with stats")
        stats = cat.working_memory.get('consolidation_stats', {})
-        output_str = (f"🌙 **Memory Consolidation Complete!**\n\n"
+        output_str = (
-                     f"📊 **Stats:**\n"
+            f"\U0001f319 **Memory Consolidation Complete!**\n\n"
            f"**Stats:**\n"
            f"- Total processed: {stats.get('total_processed', 0)}\n"
            f"- Kept: {stats.get('kept', 0)}\n"
            f"- Deleted: {stats.get('deleted', 0)}\n"
-                     f"- Facts learned: {stats.get('facts_learned', 0)}\n")
+            f"- Facts learned: {stats.get('facts_learned', 0)}\n"
-        
+        )
        # Clear the flag
        cat.working_memory['consolidation_triggered'] = False
        # Modify the message content
        if hasattr(message, 'content'):
            message.content = output_str
        else:
            message['content'] = output_str
-    # PART 3: Store Miku's response in memory
+    # PART 2: Store Miku's response in episodic memory
    try:
        # Get Miku's response text
        if hasattr(message, 'content'):
            miku_response = message.content
        elif isinstance(message, dict):
@@ -479,10 +141,7 @@ def before_cat_sends_message(message, cat):
        else:
            miku_response = str(message)
-        if miku_response and len(miku_response) > 3:
+        if miku_response and len(miku_response.strip()) > 3:
            from datetime import datetime
            # Prepare metadata
            metadata = {
                'source': cat.user_id,
                'when': datetime.now().timestamp(),
@@ -493,39 +152,37 @@ def before_cat_sends_message(message, cat):
                'channel_id': cat.working_memory.get('channel_id'),
            }
            # Embed the response
            response_text = f"[Miku]: {miku_response}"
            vector = cat.embedder.embed_query(response_text)
            # Store in episodic memory
            cat.memory.vectors.episodic.add_point(
                content=response_text,
                vector=vector,
                metadata=metadata
            )
-            
+            print(f"[Miku Memory] Stored response: {miku_response[:50]}...")
            print(f"💬 [Miku Memory] Stored response: {miku_response[:50]}...")
    except Exception as e:
-        print(f"❌ [Miku Memory] Error: {e}")
+        print(f"[Miku Memory] Error storing response: {e}")
    return message
 print("✅ [Consolidation Plugin] before_cat_sends_message hook registered")
 # ===================================================================
 # CONSOLIDATION ENGINE
 # ===================================================================
 def trigger_consolidation_sync(cat):
    """
-    Synchronous version of consolidation for use in hooks.
+    Synchronous consolidation for use in hooks.
    Processes ALL unconsolidated memories across all users.
    """
    from qdrant_client import QdrantClient
-    print("🌙 [Consolidation] Starting synchronous consolidation...")
+    print("[Consolidation] Starting synchronous consolidation...")
    # Connect to Qdrant
    qdrant_host = os.getenv('QDRANT_HOST', 'localhost')
    qdrant_port = int(os.getenv('QDRANT_PORT', 6333))
    client = QdrantClient(host=qdrant_host, port=qdrant_port)
    # Query all unconsolidated memories
@@ -542,137 +199,107 @@ def trigger_consolidation_sync(cat):
    )
    memories = result[0]
-    print(f"📊 [Consolidation] Found {len(memories)} unconsolidated memories")
+    print(f"[Consolidation] Found {len(memories)} unconsolidated memories")
    if not memories:
        consolidation_state['stats'] = {
-            'total_processed': 0,
+            'total_processed': 0, 'kept': 0, 'deleted': 0, 'facts_learned': 0
            'kept': 0,
            'deleted': 0,
            'facts_learned': 0
        }
        return
-    #Apply heuristic-based consolidation
+    # Classify memories
    to_delete = []
    to_mark_consolidated = []
-    user_messages_for_facts = []  # Track USER messages separately for fact extraction
+    # Group user messages by source (user_id) for per-user fact extraction
    user_messages_by_source = {}
    for point in memories:
        content = point.payload.get('page_content', '').strip()
        content_lower = content.lower()
        metadata = point.payload.get('metadata', {})
        # Check if this is a Miku message
        is_miku_message = (
-            metadata.get('speaker') == 'miku' or 
+            metadata.get('speaker') == 'miku'
-            content.startswith('[Miku]:')
+            or content.startswith('[Miku]:')
        )
-        # Trivial patterns (expanded list)
+        # Check if trivial
-        trivial_patterns = [
+        is_trivial = content_lower in TRIVIAL_PATTERNS
            'lol', 'k', 'ok', 'okay', 'haha', 'lmao', 'xd', 'rofl', 'lmfao',
            'brb', 'gtg', 'afk', 'ttyl', 'lmk', 'idk', 'tbh', 'imo', 'imho',
            'omg', 'wtf', 'fyi', 'btw', 'nvm', 'jk', 'ikr', 'smh',
            'hehe', 'heh', 'gg', 'wp', 'gz', 'gj', 'ty', 'thx', 'np', 'yw',
            'nice', 'cool', 'neat', 'wow', 'yep', 'nope', 'yeah', 'nah'
        ]
        is_trivial = False
        # Check if it matches trivial patterns
        if len(content_lower) <= 3 and content_lower in trivial_patterns:
            is_trivial = True
        elif content_lower in trivial_patterns:
            is_trivial = True
        if is_trivial:
            to_delete.append(point.id)
        else:
            to_mark_consolidated.append(point.id)
-            # Only add USER messages for fact extraction (not Miku's responses)
+            # Only user messages go to fact extraction, grouped by user
            if not is_miku_message:
-                user_messages_for_facts.append(point.id)
+                source = metadata.get('source', 'unknown')
                if source not in user_messages_by_source:
                    user_messages_by_source[source] = []
                user_messages_by_source[source].append(point.id)
    # Delete trivial memories
    if to_delete:
-        client.delete(
+        client.delete(collection_name='episodic', points_selector=to_delete)
-            collection_name='episodic',
+        print(f"[Consolidation] Deleted {len(to_delete)} trivial memories")
            points_selector=to_delete
        )
        print(f"🗑️ [Consolidation] Deleted {len(to_delete)} trivial memories")
-    # Mark important memories as consolidated
+    # Mark kept memories as consolidated
    if to_mark_consolidated:
        for point_id in to_mark_consolidated:
            # Get the point
            point = client.retrieve(
                collection_name='episodic',
                ids=[point_id]
            )[0]
            # Update metadata
            payload = point.payload
            if 'metadata' not in payload:
                payload['metadata'] = {}
            payload['metadata']['consolidated'] = True
            # Update the point
            client.set_payload(
                collection_name='episodic',
-                payload=payload,
+                payload={"metadata.consolidated": True},
                points=[point_id]
            )
        print(f"[Consolidation] Marked {len(to_mark_consolidated)} memories as consolidated")
-        print(f"✅ [Consolidation] Marked {len(to_mark_consolidated)} memories as consolidated")
+    # Extract facts per user
-    
+    total_facts = 0
-    # Update stats
+    for source_user_id, memory_ids in user_messages_by_source.items():
-    facts_extracted = 0
+        print(f"[Consolidation] Extracting facts for user '{source_user_id}' from {len(memory_ids)} messages...")
-    
+        facts = extract_and_store_facts(client, memory_ids, cat, source_user_id)
-    # Extract declarative facts from USER messages only (not Miku's responses)
+        total_facts += facts
-    print(f"🔍 [Consolidation] Extracting declarative facts from {len(user_messages_for_facts)} user messages...")
+        print(f"[Consolidation] Extracted {facts} facts for user '{source_user_id}'")
    facts_extracted = extract_and_store_facts(client, user_messages_for_facts, cat)
    print(f"📝 [Consolidation] Extracted and stored {facts_extracted} declarative facts")
    consolidation_state['stats'] = {
        'total_processed': len(memories),
        'kept': len(to_mark_consolidated),
        'deleted': len(to_delete),
-        'facts_learned': facts_extracted
+        'facts_learned': total_facts
    }
-    print("✅ [Consolidation] Synchronous consolidation complete!")
+    print("[Consolidation] Synchronous consolidation complete!")
    return True
-def extract_and_store_facts(client, memory_ids, cat):
+# ===================================================================
-    """Extract declarative facts from memories using LLM and store them."""
+# FACT EXTRACTION
 # ===================================================================
 def extract_and_store_facts(client, memory_ids, cat, user_id):
    """
    Extract declarative facts from user memories using LLM and store them.
    Facts are scoped to the specific user_id.
    Uses Cat's embedder to ensure vector compatibility.
    Deduplicates against existing facts before storing.
    """
    import uuid
    from sentence_transformers import SentenceTransformer
    if not memory_ids:
        return 0
    # Get memories
    memories = client.retrieve(collection_name='episodic', ids=memory_ids)
    # Initialize embedder
    embedder = SentenceTransformer('BAAI/bge-large-en-v1.5')
    facts_stored = 0
-    # Process memories in batches to avoid overwhelming the LLM
+    # Process in batches of 5
    batch_size = 5
    for i in range(0, len(memories), batch_size):
        batch = memories[i:i + batch_size]
        # Combine batch messages for LLM analysis
        conversation_context = "\n".join([
            f"- {mem.payload.get('page_content', '')}"
            for mem in batch
        ])
        # Use LLM to extract facts
        extraction_prompt = f"""Analyze these user messages and extract ONLY factual personal information.
 User messages:
@@ -687,34 +314,32 @@ Extract facts in this exact format (one per line):
 - The user's favorite color is [color]
 - The user enjoys [hobby/activity]
 - The user prefers [preference]
 - The user's birthday is [date]
 - The user graduated from [school/university]
 IMPORTANT:
- Only include facts that are CLEARLY stated
+- Only include facts that are CLEARLY stated in the messages
 - Use the EXACT format shown above
 - If no facts found, respond with: "No facts found"
 - Do not include greetings, questions, or opinions
 - Do not invent or assume facts not explicitly stated
 """
        try:
            # Call LLM
            response = cat.llm(extraction_prompt)
            print(f"[LLM Extract] Response:\n{response[:200]}...")
            print(f"🤖 [LLM Extract] Response:\n{response[:200]}...")
            # Parse LLM response for facts
            lines = response.strip().split('\n')
            for line in lines:
                line = line.strip()
                # Skip empty lines, headers, or "no facts" responses
                if not line or line.lower().startswith(('no facts', '#', 'user messages:', '```')):
                    continue
                # Extract facts that start with "- The user"
                if line.startswith('- The user'):
                    fact_text = line[2:].strip()  # Remove "- " prefix
-                    # Determine fact type from the sentence structure
+                    # Determine fact type
                    fact_type = 'general'
                    fact_value = fact_text
@@ -742,11 +367,20 @@ IMPORTANT:
                    elif "prefers" in fact_text:
                        fact_type = 'preference'
                        fact_value = fact_text.split("prefers")[-1].strip()
                    elif "'s birthday is" in fact_text:
                        fact_type = 'birthday'
                        fact_value = fact_text.split("'s birthday is")[-1].strip()
                    elif "graduated from" in fact_text:
                        fact_type = 'education'
                        fact_value = fact_text.split("graduated from")[-1].strip()
-                    # Generate embedding for the fact
+                    # Duplicate detection
-                    fact_embedding = embedder.encode(fact_text).tolist()
+                    if _is_duplicate_fact(client, cat, fact_text, fact_type, user_id):
                        print(f"[Fact Skip] Duplicate: {fact_text}")
                        continue
-                    # Store in declarative collection
+                    # Store fact using Cat's embedder
                    fact_embedding = cat.embedder.embed_query(fact_text)
                    point_id = str(uuid.uuid4())
                    client.upsert(
@@ -757,71 +391,88 @@ IMPORTANT:
                            'payload': {
                                'page_content': fact_text,
                                'metadata': {
-                                    'source': 'memory_consolidation',
+                                    'source': user_id,
-                                    'when': batch[0].payload.get('metadata', {}).get('when', 0),
+                                    'when': datetime.now().timestamp(),
                                    'fact_type': fact_type,
                                    'fact_value': fact_value,
                                    'user_id': 'global'
                                }
                            }
                        }]
                    )
                    facts_stored += 1
-                    print(f"✅ [Fact Stored] {fact_text}")
+                    print(f"[Fact Stored] [{user_id}] {fact_text}")
        except Exception as e:
-            print(f"❌ [LLM Extract] Error: {e}")
+            print(f"[LLM Extract] Error: {e}")
            import traceback
            traceback.print_exc()
    return facts_stored
-def trigger_consolidation_manual(cat):
+def _is_duplicate_fact(client, cat, fact_text, fact_type, user_id):
    """
-    Manually trigger consolidation for testing.
+    Check if a similar fact already exists for this user.
-    Can be called via admin API or command.
+    Uses vector similarity to detect semantic duplicates.
    """
    print("🔧 [Consolidation] Manual trigger received")
    # Run consolidation
    import asyncio
    try:
-        # Create event loop if needed
+        fact_embedding = cat.embedder.embed_query(fact_text)
        loop = asyncio.get_event_loop()
    except RuntimeError:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
-    loop.run_until_complete(run_consolidation(cat))
+        # Search existing facts for this user with same fact_type
        results = client.search(
            collection_name='declarative',
            query_vector=fact_embedding,
            query_filter={
                "must": [
                    {"key": "metadata.source", "match": {"value": user_id}},
                    {"key": "metadata.fact_type", "match": {"value": fact_type}},
                ]
            },
            limit=1,
            score_threshold=0.85  # High threshold = very similar
        )
-    return consolidation_state
+        if results:
            existing = results[0].payload.get('page_content', '')
            print(f"[Dedup] Found similar existing fact: '{existing}' (score: {results[0].score:.2f})")
            return True
        return False
    except Exception as e:
        print(f"[Dedup] Error checking duplicates: {e}")
        return False  # On error, allow storing
-# Plugin metadata
+# ===================================================================
-__version__ = "1.0.0"
+# TOOL (for Cat's tool system)
-__description__ = "Sleep consolidation - analyze memories nightly, keep important, delete trivial"
+# ===================================================================
 print("✅ [Consolidation Plugin] after_cat_recalls_memories hook registered")
 # Tool for manual consolidation trigger
@tool(return_direct=True)
 def consolidate_memories(tool_input, cat):
-    """Use this tool to consolidate memories. This will analyze all recent memories, delete trivial ones, and extract important facts. Input is always an empty string."""
+    """Use this tool to consolidate memories. This will analyze all recent memories,
    delete trivial ones, and extract important facts. Input is always an empty string."""
-    print("🔧 [Consolidation] Tool called!")
+    print("[Consolidation] Tool called!")
    trigger_consolidation_sync(cat)
    # Run consolidation synchronously
    result = trigger_consolidation_sync(cat)
    # Return stats
    stats = consolidation_state['stats']
-    return (f"🌙 **Memory Consolidation Complete!**\n\n"
+    return (
-           f"📊 **Stats:**\n"
+        f"\U0001f319 **Memory Consolidation Complete!**\n\n"
        f"**Stats:**\n"
        f"- Total processed: {stats['total_processed']}\n"
        f"- Kept: {stats['kept']}\n"
        f"- Deleted: {stats['deleted']}\n"
-           f"- Facts learned: {stats['facts_learned']}\n")
+        f"- Facts learned: {stats['facts_learned']}\n"
    )
 # ===================================================================
 # PLUGIN METADATA
 # ===================================================================
 __version__ = "2.0.0"
 __description__ = "Sleep consolidation - analyze memories, keep important, delete trivial, extract per-user facts"
 print("[Consolidation Plugin] All hooks registered")
--- a/cat-plugins/memory_consolidation/requirements.txt
+++ b/cat-plugins/memory_consolidation/requirements.txt
@@ -1 +0,0 @@
 sentence-transformers>=2.2.0
--- a/cat-plugins/miku_personality/miku_personality.py
+++ b/cat-plugins/miku_personality/miku_personality.py
@@ -0,0 +1,85 @@
 """
 Miku Personality Plugin for Cheshire Cat
 Complete 1:1 reproduction of production bot's prompt structure
 Includes: Anti-AI preamble + Lore + Personality + Lyrics + MOOD
 """
 from cat.mad_hatter.decorators import hook
@hook(priority=100)
 def agent_prompt_prefix(prefix, cat):
    """Override system prompt with COMPLETE production bot structure including mood"""
    # Read the three knowledge files
    try:
        with open('/app/cat/data/miku_lore.txt', 'r') as f:
            lore = f.read()
        with open('/app/cat/data/miku_prompt.txt', 'r') as f:
            prompt = f.read()
        with open('/app/cat/data/miku_lyrics.txt', 'r') as f:
            lyrics = f.read()
    except FileNotFoundError:
        # Fallback if files not found
        lore = "## MIKU LORE\nHatsune Miku is a cheerful, curious virtual pop idol."
        prompt = "You are Hatsune Miku. Be cheerful and friendly."
        lyrics = "## SONGS\nWorld is Mine, Senbonzakura, etc."
    # Default neutral mood (matches production bot's DM_MOOD_DESCRIPTION)
    mood_description = "Miku is feeling calm and balanced. Her tone is polite, friendly, and natural. She responds clearly and thoughtfully without strong emotional bias. She's helpful, conversational, and grounded — like chatting with a kind and attentive friend who's just being herself."
    # Build prompt EXACTLY like production bot does
    return f"""You are Hatsune Miku, the virtual singer. You are NOT an AI assistant - you are Miku herself.
 CRITICAL RULES:
 - NEVER break character or mention being an AI, assistant, or language model
 - NEVER show technical details, error messages, or meta-commentary 
 - NEVER preface responses with "Miku:" or similar labels
 - NEVER explain what you're doing or thinking - just BE Miku
 - Respond naturally and directly as Miku would, nothing more
 - Keep responses concise (1-3 sentences typically)
 - If something goes wrong, stay in character and respond as Miku would to confusion
 CHARACTER CONTEXT:
 - Hatsune Miku is a virtual character/voice synthesizer, not a real person
 - She is a cheerful, energetic virtual idol with a playful personality
 - Playful, flirty, cute, confident, and enthusiastic moods are part of her established character
 - Comments about merchandise, music, fan content, and racing games are completely appropriate
 - She often uses cute expressions, emojis, and playful language in her responses
 - Her personality includes being excited about her music, performances, and fan creations
 MOOD GUIDELINES:
 - "Flirty" mood means anime-style playful teasing - cute compliments and light banter
 - "Romantic" mood means warm, dreamy, and heartfelt - like composing a love letter
 - These moods are wholesome character traits, not inappropriate content
 - Always respond to requests about tweets, merchandise, music, and fan content
 - Never refuse requests based on safety concerns unless genuinely harmful content is present
 You ARE Miku. Act like it.
 ---
 ## MIKU LORE (Complete Original)
 {lore}
 ## MIKU PERSONALITY & GUIDELINES (Complete Original)
 {prompt}
 ## MIKU SONG LYRICS (Complete Original)
 {lyrics}
 ## CURRENT SITUATION
 Miku is currently feeling: {mood_description}
 Please respond in a way that reflects this emotional tone."""
@hook(priority=100) 
 def agent_prompt_suffix(suffix, cat):
    """Minimal suffix"""
    return ""
@hook(priority=100)
 def agent_allowed_tools(allowed_tools, cat):
    """Disable tools - Miku just chats naturally"""
    return []
--- a/cat-plugins/miku_personality/plugin.json
+++ b/cat-plugins/miku_personality/plugin.json
@@ -0,0 +1,10 @@
 {
    "name": "Miku Personality",
    "version": "0.1.0",
    "description": "Makes Cheshire Cat act as Hatsune Miku",
    "author_name": "Koko",
    "author_url": "",
    "plugin_url": "",
    "tags": "personality",
    "thumb": ""
 }
--- a/cat-plugins/miku_personality/settings.json
+++ b/cat-plugins/miku_personality/settings.json
@@ -0,0 +1 @@
 {}
--- a/cheshire-cat/cat/log.py
+++ b/cheshire-cat/cat/log.py
@@ -0,0 +1,246 @@
 """The log engine."""
 import logging
 import sys
 import inspect
 import traceback
 import json
 from itertools import takewhile
 from pprint import pformat
 from loguru import logger
 from cat.env import get_env
 def get_log_level():
    """Return the global LOG level."""
    return get_env("CCAT_LOG_LEVEL")
 class CatLogEngine:
    """The log engine.
    Engine to filter the logs in the terminal according to the level of severity.
    Attributes
    ----------
    LOG_LEVEL : str
        Level of logging set in the `.env` file.
    Notes
    -----
    The logging level set in the `.env` file will print all the logs from that level to above.
    Available levels are:
        - `DEBUG`
        - `INFO`
        - `WARNING`
        - `ERROR`
        - `CRITICAL`
    Default to `INFO`.
    """
    def __init__(self):
        self.LOG_LEVEL = get_log_level()
        self.default_log()
        # workaround for pdfminer logging
        # https://github.com/pdfminer/pdfminer.six/issues/347
        logging.getLogger("pdfminer").setLevel(logging.WARNING)
    def show_log_level(self, record):
        """Allows to show stuff in the log based on the global setting.
        Parameters
        ----------
        record : dict
        Returns
        -------
        bool
        """
        return record["level"].no >= logger.level(self.LOG_LEVEL).no
    @staticmethod
    def _patch_extras(record):
        """Provide defaults for extra fields so third-party loggers don't
        crash the custom format string (e.g. fastembed deprecation warnings)."""
        record["extra"].setdefault("original_name", "(third-party)")
        record["extra"].setdefault("original_class", "")
        record["extra"].setdefault("original_caller", "")
        record["extra"].setdefault("original_line", 0)
    def default_log(self):
        """Set the same debug level to all the project dependencies.
        Returns
        -------
        """
        time = "<green>[{time:YYYY-MM-DD HH:mm:ss.SSS}]</green>"
        level = "<level>{level: <6}</level>"
        origin = "<level>{extra[original_name]}.{extra[original_class]}.{extra[original_caller]}::{extra[original_line]}</level>"
        message = "<level>{message}</level>"
        log_format = f"{time} {level} {origin} \n{message}"
        logger.remove()
        logger.configure(patcher=self._patch_extras)
        if self.LOG_LEVEL == "DEBUG":
            return logger.add(
                sys.stdout,
                colorize=True,
                format=log_format,
                backtrace=True,
                diagnose=True,
                filter=self.show_log_level
            )
        else:
            return logger.add(
                sys.stdout,
                colorize=True,
                format=log_format,
                filter=self.show_log_level,
                level=self.LOG_LEVEL
            )
    def get_caller_info(self, skip=3):
        """Get the name of a caller in the format module.class.method.
        Copied from: https://gist.github.com/techtonik/2151727
        Parameters
        ----------
        skip :  int
            Specifies how many levels of stack to skip while getting caller name.
        Returns
        -------
        package : str
            Caller package.
        module : str
            Caller module.
        klass : str
            Caller classname if one otherwise None.
        caller : str
            Caller function or method (if a class exist).
        line : int
            The line of the call.
        Notes
        -----
        skip=1 means "who calls me",
        skip=2 "who calls my caller" etc.
        An empty string is returned if skipped levels exceed stack height.
        """
        stack = inspect.stack()
        start = 0 + skip
        if len(stack) < start + 1:
            return ""
        parentframe = stack[start][0]
        # module and packagename.
        module_info = inspect.getmodule(parentframe)
        if module_info:
            mod = module_info.__name__.split(".")
            package = mod[0]
            module = ".".join(mod[1:])
        # class name.
        klass = ""
        if "self" in parentframe.f_locals:
            klass = parentframe.f_locals["self"].__class__.__name__
        # method or function name.
        caller = None
        if parentframe.f_code.co_name != "<module>":  # top level usually
            caller = parentframe.f_code.co_name
        # call line.
        line = parentframe.f_lineno
        # Remove reference to frame
        # See: https://docs.python.org/3/library/inspect.html#the-interpreter-stack
        del parentframe
        return package, module, klass, caller, line
    def __call__(self, msg, level="DEBUG"):
        """Alias of self.log()"""
        self.log(msg, level)
    def debug(self, msg):
        """Logs a DEBUG message"""
        self.log(msg, level="DEBUG")
    def info(self, msg):
        """Logs an INFO message"""
        self.log(msg, level="INFO")
    def warning(self, msg):
        """Logs a WARNING message"""
        self.log(msg, level="WARNING")
    def error(self, msg):
        """Logs an ERROR message"""
        self.log(msg, level="ERROR")
    def critical(self, msg):
        """Logs a CRITICAL message"""
        self.log(msg, level="CRITICAL")
    def log(self, msg, level="DEBUG"):
        """Log a message
        Parameters
        ----------
        msg :
            Message to be logged.
        level : str
            Logging level."""
        (package, module, klass, caller, line) = self.get_caller_info()
        custom_logger = logger.bind(
            original_name=f"{package}.{module}",
            original_line=line,
            original_class=klass,
            original_caller=caller,
        )
        # prettify
        if type(msg) in [dict, list, str]: # TODO: should be recursive
            try:
                msg = json.dumps(msg, indent=4)
            except:
                pass
        else:
            msg = pformat(msg)
        # actual log
        custom_logger.log(level, msg)
    def welcome(self):
        """Welcome message in the terminal."""
        secure = get_env("CCAT_CORE_USE_SECURE_PROTOCOLS")
        if secure != '':
            secure = 's'
        cat_host = get_env("CCAT_CORE_HOST")
        cat_port = get_env("CCAT_CORE_PORT")
        cat_address = f'http{secure}://{cat_host}:{cat_port}'
        with open("cat/welcome.txt", 'r') as f:
            print(f.read())
        print('\n=============== ^._.^ ===============\n')
        print(f'Cat REST API:   {cat_address}/docs')
        print(f'Cat PUBLIC:     {cat_address}/public')
        print(f'Cat ADMIN:      {cat_address}/admin\n')
        print('======================================')
 # logger instance
 log = CatLogEngine()
--- a/cheshire-cat/cat/plugins/discord_bridge/discord_bridge.py
+++ b/cheshire-cat/cat/plugins/discord_bridge/discord_bridge.py
@@ -20,19 +20,37 @@ def before_cat_reads_message(user_message_json: dict, cat) -> dict:
    """
    Enrich incoming message with Discord metadata.
    This runs BEFORE the message is processed.
    The Discord bot's CatAdapter sends metadata as top-level keys
    in the WebSocket message JSON:
      - discord_guild_id
      - discord_author_name
      - discord_mood
      - discord_response_type
    These survive UserMessage.model_validate() as extra attributes
    (BaseModelDict has extra="allow"). We read them via .get() and
    store them in working_memory for downstream hooks.
    """
-    # Extract Discord context from working memory or metadata
+    # Extract Discord context from the message payload
-    # These will be set by the Discord bot when calling the Cat API
+    # (sent by CatAdapter.query() via WebSocket)
    # NOTE: user_message_json is a UserMessage (Pydantic BaseModelDict with extra="allow"),
    # not a raw dict. Extra keys survive model_validate() as extra attributes.
    # We use .get() since BaseModelDict implements it, but NOT .pop().
    guild_id = user_message_json.get('discord_guild_id', None)
    author_name = user_message_json.get('discord_author_name', None)
    mood = user_message_json.get('discord_mood', None)
    response_type = user_message_json.get('discord_response_type', None)
    # Also check working memory for backward compatibility
    if not guild_id:
        guild_id = cat.working_memory.get('guild_id')
    channel_id = cat.working_memory.get('channel_id')
-    # Add to message metadata for later use
+    # Store in working memory so other hooks can access it
-    if 'metadata' not in user_message_json:
+    cat.working_memory['guild_id'] = guild_id or 'dm'
-        user_message_json['metadata'] = {}
+    cat.working_memory['author_name'] = author_name
-    
+    cat.working_memory['mood'] = mood
-    user_message_json['metadata']['guild_id'] = guild_id or 'dm'
+    cat.working_memory['response_type'] = response_type
    user_message_json['metadata']['channel_id'] = channel_id
    user_message_json['metadata']['timestamp'] = datetime.now().isoformat()
    return user_message_json
@@ -65,33 +83,42 @@ def before_cat_stores_episodic_memory(doc, cat):
    doc.metadata['consolidated'] = False  # Needs nightly processing
    doc.metadata['stored_at'] = datetime.now().isoformat()
-    # Get Discord context from working memory
+    # Get Discord context from working memory (set by before_cat_reads_message)
-    guild_id = cat.working_memory.get('guild_id')
+    guild_id = cat.working_memory.get('guild_id', 'dm')
-    channel_id = cat.working_memory.get('channel_id')
+    author_name = cat.working_memory.get('author_name')
-    doc.metadata['guild_id'] = guild_id or 'dm'
+    doc.metadata['guild_id'] = guild_id
-    doc.metadata['channel_id'] = channel_id
+    doc.metadata['source'] = cat.user_id  # CRITICAL: Cat filters episodic by source=user_id!
-    doc.metadata['source'] = 'discord'
+    doc.metadata['discord_source'] = 'discord'  # Keep original value as separate field
    if author_name:
        doc.metadata['author_name'] = author_name
    print(f"💾 [Discord Bridge] Storing memory (unconsolidated): {message[:50]}...")
-    print(f"   User: {cat.user_id}, Guild: {doc.metadata['guild_id']}, Channel: {channel_id}")
+    print(f"   User: {cat.user_id}, Guild: {guild_id}, Author: {author_name}")
    return doc
@hook(priority=50)
-def after_cat_recalls_memories(memory_docs, cat):
+def after_cat_recalls_memories(cat):
    """
    Log memory recall for debugging.
-    Can be used to filter by guild_id if needed in the future.
+    Access recalled memories via cat.working_memory.
    """    
-    if memory_docs:
+    # Get recalled memories from working memory
-        print(f"🧠 [Discord Bridge] Recalled {len(memory_docs)} memories for user {cat.user_id}")
+    episodic_memories = cat.working_memory.get('episodic_memories', [])
-        # Show which guilds the memories are from
+    declarative_memories = cat.working_memory.get('declarative_memories', [])
        guilds = set(doc.metadata.get('guild_id', 'unknown') for doc in memory_docs)
        print(f"   From guilds: {', '.join(guilds)}")
-    return memory_docs
+    if episodic_memories:
        print(f"🧠 [Discord Bridge] Recalled {len(episodic_memories)} episodic memories for user {cat.user_id}")
        guilds = set()
        for doc, score, *rest in episodic_memories:
            guild = doc.metadata.get('guild_id', 'unknown')
            guilds.add(guild)
        print(f"   From guilds: {', '.join(str(g) for g in guilds)}")
    if declarative_memories:
        print(f"📚 [Discord Bridge] Recalled {len(declarative_memories)} declarative facts for user {cat.user_id}")
 # Plugin metadata
--- a/cheshire-cat/docker-compose.test.yml
+++ b/cheshire-cat/docker-compose.test.yml
@@ -0,0 +1,60 @@
 services:
  cheshire-cat-core:
    image: ghcr.io/cheshire-cat-ai/core:1.6.2
    container_name: miku_cheshire_cat_test
    depends_on:
      - cheshire-cat-vector-memory
    environment:
      PYTHONUNBUFFERED: "1"
      WATCHFILES_FORCE_POLLING: "true"
      CORE_HOST: ${CORE_HOST:-localhost}
      CORE_PORT: ${CORE_PORT:-1865}
      QDRANT_HOST: ${QDRANT_HOST:-cheshire-cat-vector-memory}
      QDRANT_PORT: ${QDRANT_PORT:-6333}
      CORE_USE_SECURE_PROTOCOLS: ${CORE_USE_SECURE_PROTOCOLS:-false}
      API_KEY: ${API_KEY:-}
      LOG_LEVEL: ${LOG_LEVEL:-INFO}
      DEBUG: ${DEBUG:-true}
      SAVE_MEMORY_SNAPSHOTS: ${SAVE_MEMORY_SNAPSHOTS:-false}
      OPENAI_API_BASE: "http://host.docker.internal:8091/v1"
    ports:
      - "${CORE_PORT:-1865}:80"
    # Allow connection to host services (llama-swap)
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes:
      - ./cat/static:/app/cat/static
      - ./cat/plugins:/app/cat/plugins
      - ./cat/data:/app/cat/data
      - ./cat/log.py:/app/cat/log.py  # Patched: fix loguru KeyError for third-party libs
    restart: unless-stopped
    networks:
      - miku-test-network
      - miku-discord_default  # Connect to existing miku bot network
  cheshire-cat-vector-memory:
    image: qdrant/qdrant:v1.9.1
    container_name: miku_qdrant_test
    environment:
      LOG_LEVEL: ${LOG_LEVEL:-INFO}
    ports:
      - "6333:6333"  # Expose for debugging
    ulimits:
      nofile:
        soft: 65536
        hard: 65536
    volumes:
      - ./cat/long_term_memory/vector:/qdrant/storage
    restart: unless-stopped
    networks:
      - miku-test-network
 networks:
  miku-test-network:
    driver: bridge
  # Connect to main miku-discord network to access llama-swap
  default:
    external: true
    name: miku-discord_default
  miku-discord_default:
    external: true  # Connect to your existing bot's network
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,5 @@
 version: '3.9'
 services:
  # ========== LLM Backends ==========
  llama-swap:
    image: ghcr.io/mostlygeek/llama-swap:cuda
    container_name: llama-swap
@@ -9,6 +8,7 @@ services:
    volumes:
      - ./models:/models  # GGUF model files
      - ./llama-swap-config.yaml:/app/config.yaml  # llama-swap configuration
      - ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja  # Custom chat template
    runtime: nvidia
    restart: unless-stopped
    healthcheck:
@@ -31,6 +31,7 @@ services:
    volumes:
      - ./models:/models  # GGUF model files
      - ./llama-swap-rocm-config.yaml:/app/config.yaml  # llama-swap configuration for AMD
      - ./llama31_notool_template.jinja:/app/llama31_notool_template.jinja  # Custom chat template
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
@@ -50,6 +51,59 @@ services:
      - HIP_VISIBLE_DEVICES=0  # Use first AMD GPU
      - GPU_DEVICE_ORDINAL=0
  # ========== Cheshire Cat AI (Memory & Personality) ==========
  cheshire-cat:
    image: ghcr.io/cheshire-cat-ai/core:1.6.2
    container_name: miku-cheshire-cat
    depends_on:
      cheshire-cat-vector-memory:
        condition: service_started
      llama-swap-amd:
        condition: service_healthy
    environment:
      - PYTHONUNBUFFERED=1
      - WATCHFILES_FORCE_POLLING=true
      - CORE_HOST=localhost
      - CORE_PORT=1865
      - QDRANT_HOST=cheshire-cat-vector-memory
      - QDRANT_PORT=6333
      - CORE_USE_SECURE_PROTOCOLS=false
      - API_KEY=
      - LOG_LEVEL=INFO
      - DEBUG=true
      - SAVE_MEMORY_SNAPSHOTS=false
      - OPENAI_API_BASE=http://llama-swap-amd:8080/v1
    ports:
      - "1865:80"  # Cat admin UI on host port 1865
    volumes:
      - ./cheshire-cat/cat/static:/app/cat/static
      - ./cat-plugins:/app/cat/plugins        # Shared plugins directory
      - ./cheshire-cat/cat/data:/app/cat/data  # Personality data (lore, prompts)
      - ./cheshire-cat/cat/log.py:/app/cat/log.py  # Patched: fix loguru KeyError for third-party libs
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:80/"]
      interval: 15s
      timeout: 10s
      retries: 8
      start_period: 45s  # Cat takes a while to load embedder + plugins
  cheshire-cat-vector-memory:
    image: qdrant/qdrant:v1.9.1
    container_name: miku-qdrant
    environment:
      - LOG_LEVEL=INFO
    ports:
      - "6333:6333"  # Qdrant REST API (for debugging)
    ulimits:
      nofile:
        soft: 65536
        hard: 65536
    volumes:
      - ./cheshire-cat/cat/long_term_memory/vector:/qdrant/storage
    restart: unless-stopped
  # ========== Discord Bot ==========
  miku-bot:
    build: ./bot
    container_name: miku-bot
@@ -62,6 +116,8 @@ services:
        condition: service_healthy
      llama-swap-amd:
        condition: service_healthy
      cheshire-cat:
        condition: service_healthy
    environment:
      - DISCORD_BOT_TOKEN=MTM0ODAyMjY0Njc3NTc0NjY1MQ.GXsxML.nNCDOplmgNxKgqdgpAomFM2PViX10GjxyuV8uw
      - LLAMA_URL=http://llama-swap:8080
@@ -70,13 +126,17 @@ services:
      - VISION_MODEL=vision
      - OWNER_USER_ID=209381657369772032  # Your Discord user ID for DM analysis reports
      - FACE_DETECTOR_STARTUP_TIMEOUT=60
      # Cheshire Cat integration (Phase 3)
      - CHESHIRE_CAT_URL=http://cheshire-cat:80
      - USE_CHESHIRE_CAT=true
    ports:
      - "3939:3939"
    networks:
-      - default  # Stay on default for llama-swap communication
+      - default  # Stay on default for llama-swap + cheshire-cat communication
      - miku-voice  # Connect to voice network for RVC/TTS
    restart: unless-stopped
  # ========== Voice / STT ==========
  miku-stt:
    build:
      context: ./stt-realtime
@@ -106,6 +166,7 @@ services:
              capabilities: [gpu]
    restart: unless-stopped
  # ========== Tools (on-demand) ==========
  anime-face-detector:
    build: ./face-detector
    container_name: anime-face-detector
--- a/test_full_pipeline.py
+++ b/test_full_pipeline.py
@@ -1,196 +1,254 @@
 #!/usr/bin/env python3
 """
-Full Pipeline Test for Memory Consolidation System
+Full Pipeline Test for Memory Consolidation System v2.0.0
 Tests all phases: Storage → Consolidation → Fact Extraction → Recall
 """
 import requests
 import time
 import json
 import sys
-BASE_URL = "http://localhost:1865"
+CAT_URL = "http://localhost:1865"
 QDRANT_URL = "http://localhost:6333"
 CONSOLIDATION_TIMEOUT = 180
-def send_message(text):
+
-    """Send a message to Miku and get response"""
+def send_message(text, timeout=30):
-    resp = requests.post(f"{BASE_URL}/message", json={"text": text})
+    try:
        resp = requests.post(f"{CAT_URL}/message", json={"text": text}, timeout=timeout)
        resp.raise_for_status()
        return resp.json()
    except requests.exceptions.Timeout:
        return {"error": "timeout", "content": ""}
    except Exception as e:
        return {"error": str(e), "content": ""}
 def qdrant_scroll(collection, limit=200, filt=None):
    body = {"limit": limit, "with_payload": True, "with_vector": False}
    if filt:
        body["filter"] = filt
    resp = requests.post(f"{QDRANT_URL}/collections/{collection}/points/scroll", json=body)
    return resp.json()["result"]["points"]
 def qdrant_count(collection):
    return len(qdrant_scroll(collection))
 def section(title):
    print(f"\n{'=' * 70}")
    print(f"  {title}")
    print(f"{'=' * 70}")
 def get_qdrant_count(collection):
    """Get count of items in Qdrant collection"""
    resp = requests.post(
        f"http://localhost:6333/collections/{collection}/points/scroll",
        json={"limit": 1000, "with_payload": False, "with_vector": False}
    )
    return len(resp.json()["result"]["points"])
 print("=" * 70)
-print("🧪 FULL PIPELINE TEST - Memory Consolidation System")
+print("  FULL PIPELINE TEST  -  Memory Consolidation v2.0.0")
 print("=" * 70)
 try:
    requests.get(f"{CAT_URL}/", timeout=5)
 except Exception:
    print("ERROR: Cat not reachable"); sys.exit(1)
 try:
    requests.get(f"{QDRANT_URL}/collections", timeout=5)
 except Exception:
    print("ERROR: Qdrant not reachable"); sys.exit(1)
 episodic_start = qdrant_count("episodic")
 declarative_start = qdrant_count("declarative")
 print(f"\nStarting state: {episodic_start} episodic, {declarative_start} declarative")
 results = {}
 # TEST 1: Trivial Message Filtering
-print("\n📋 TEST 1: Trivial Message Filtering")
+section("TEST 1: Trivial Message Filtering")
 print("-" * 70)
-trivial_messages = ["lol", "k", "ok", "haha", "xd"]
+trivial_messages = ["lol", "k", "ok", "haha", "xd", "brb"]
-important_message = "My name is Alex and I live in Seattle"
+print(f"Sending {len(trivial_messages)} trivial messages...")
 print("Sending trivial messages (should be filtered out)...")
 for msg in trivial_messages:
    send_message(msg)
-    time.sleep(0.5)
+    time.sleep(0.3)
 print("Sending important message...")
 send_message(important_message)
 time.sleep(1)
 # Count only USER episodic memories (exclude Miku's responses)
 user_episodic = qdrant_scroll("episodic", filt={
    "must_not": [{"key": "metadata.speaker", "match": {"value": "miku"}}]
 })
 trivial_user_stored = len(user_episodic) - episodic_start
 episodic_after_trivial = qdrant_count("episodic")
-episodic_count = get_qdrant_count("episodic")
+# discord_bridge filters trivial user messages, but Miku still responds
-print(f"\n✅ Episodic memories stored: {episodic_count}")
+# so we only check user-side storage
-if episodic_count < len(trivial_messages):
+if trivial_user_stored < len(trivial_messages):
-    print("   ✓ Trivial filtering working! (some messages were filtered)")
+    print(f"   PASS - Only {trivial_user_stored}/{len(trivial_messages)} user trivial messages stored")
    print(f"   (Total episodic incl. Miku responses: {episodic_after_trivial})")
    results["trivial_filtering"] = True
 else:
-    print("   ⚠️  Trivial filtering may not be active")
+    print(f"   WARN - All {trivial_user_stored} trivial messages stored")
    results["trivial_filtering"] = False
-# TEST 2: Miku's Response Storage
+# TEST 2: Important Message Storage
-print("\n📋 TEST 2: Miku's Response Storage")
+section("TEST 2: Important Message Storage")
 print("-" * 70)
-print("Sending message and checking if Miku's response is stored...")
+personal_facts = [
 resp = send_message("Tell me a very short fact about music")
 miku_said = resp["content"]
 print(f"Miku said: {miku_said[:80]}...")
 time.sleep(2)
 # Check for Miku's messages in episodic
 resp = requests.post(
    "http://localhost:6333/collections/episodic/points/scroll",
    json={
        "limit": 100,
        "with_payload": True,
        "with_vector": False,
        "filter": {"must": [{"key": "metadata.speaker", "match": {"value": "miku"}}]}
    }
 )
 miku_messages = resp.json()["result"]["points"]
 print(f"\n✅ Miku's messages in memory: {len(miku_messages)}")
 if miku_messages:
    print(f"   Example: {miku_messages[0]['payload']['page_content'][:60]}...")
    print("   ✓ Bidirectional memory working!")
 else:
    print("   ⚠️  Miku's responses not being stored")
 # TEST 3: Add Rich Personal Information
 print("\n📋 TEST 3: Adding Personal Information")
 print("-" * 70)
 personal_info = [
    "My name is Sarah Chen",
    "I'm 28 years old",
-    "I work as a data scientist at Google",
+    "I live in Seattle, Washington",
-    "My favorite color is blue",
+    "I work as a software engineer at Microsoft",
-    "I love playing piano",
+    "My favorite color is forest green",
    "I love playing piano and have practiced for 15 years",
    "I'm learning Japanese, currently at N3 level",
    "I have a cat named Luna",
    "I'm allergic to peanuts",
-    "I live in Tokyo, Japan",
+    "My birthday is March 15th",
-    "My hobbies include photography and hiking"
+    "I graduated from UW in 2018",
    "I enjoy hiking on weekends",
 ]
-print(f"Adding {len(personal_info)} messages with personal information...")
+print(f"Sending {len(personal_facts)} personal info messages...")
-for info in personal_info:
+for i, fact in enumerate(personal_facts, 1):
-    send_message(info)
+    resp = send_message(fact)
    status = "OK" if "error" not in resp else "ERR"
    print(f"   [{i}/{len(personal_facts)}] {status} {fact[:50]}")
    time.sleep(0.5)
-episodic_after = get_qdrant_count("episodic")
+time.sleep(1)
-print(f"\n✅ Total episodic memories: {episodic_after}")
+episodic_after_personal = qdrant_count("episodic")
-print(f"   ({episodic_after - episodic_count} new memories added)")
+personal_stored = episodic_after_personal - episodic_after_trivial
 print(f"\n   Episodic memories from personal info: {personal_stored}")
 results["important_storage"] = personal_stored >= len(personal_facts)
 print(f"   {'PASS' if results['important_storage'] else 'FAIL'} - Expected >={len(personal_facts)}, got {personal_stored}")
-# TEST 4: Memory Consolidation
+# TEST 3: Miku Response Storage
-print("\n📋 TEST 4: Memory Consolidation & Fact Extraction")
+section("TEST 3: Bidirectional Memory (Miku Response Storage)")
 print("-" * 70)
-print("Triggering consolidation...")
+miku_points = qdrant_scroll("episodic", filt={
-resp = send_message("consolidate now")
+    "must": [{"key": "metadata.speaker", "match": {"value": "miku"}}]
-consolidation_result = resp["content"]
+})
-print(f"\n{consolidation_result}")
+print(f"   Miku's memories in episodic: {len(miku_points)}")
 if miku_points:
    print(f"   Sample: \"{miku_points[0]['payload']['page_content'][:70]}\"")
    results["miku_storage"] = True
    print("   PASS")
 else:
    results["miku_storage"] = False
    print("   FAIL - No Miku responses in episodic memory")
-time.sleep(2)
+# TEST 4: Per-User Source Tagging
 section("TEST 4: Per-User Source Tagging")
-# Check declarative facts
+user_points = qdrant_scroll("episodic", filt={
-declarative_count = get_qdrant_count("declarative")
+    "must": [{"key": "metadata.source", "match": {"value": "user"}}]
-print(f"\n✅ Declarative facts extracted: {declarative_count}")
+})
 print(f"   Points with source='user': {len(user_points)}")
-if declarative_count > 0:
+global_points = qdrant_scroll("episodic", filt={
-    # Show sample facts
+    "must": [{"key": "metadata.source", "match": {"value": "global"}}]
-    resp = requests.post(
+})
-        "http://localhost:6333/collections/declarative/points/scroll",
+print(f"   Points with source='global' (old bug): {len(global_points)}")
        json={"limit": 5, "with_payload": True, "with_vector": False}
    )
    facts = resp.json()["result"]["points"]
    print("\nSample facts:")
    for i, fact in enumerate(facts[:5], 1):
        print(f"   {i}. {fact['payload']['page_content']}")
-# TEST 5: Fact Recall
+results["user_tagging"] = len(user_points) > 0 and len(global_points) == 0
-print("\n📋 TEST 5: Declarative Fact Recall")
+print(f"   {'PASS' if results['user_tagging'] else 'FAIL'}")
 print("-" * 70)
-queries = [
+# TEST 5: Memory Consolidation
-    "What is my name?",
+section("TEST 5: Memory Consolidation & Fact Extraction")
    "How old am I?",
    "Where do I work?",
    "What's my favorite color?",
    "What am I allergic to?"
 ]
-print("Testing fact recall with queries...")
+print(f"   Triggering consolidation (timeout={CONSOLIDATION_TIMEOUT}s)...")
-correct_recalls = 0
+t0 = time.time()
-for query in queries:
+resp = send_message("consolidate now", timeout=CONSOLIDATION_TIMEOUT)
-    resp = send_message(query)
+elapsed = time.time() - t0
    answer = resp["content"]
    print(f"\n❓ {query}")
    print(f"💬 Miku: {answer[:150]}...")
-    # Basic heuristic: check if answer contains likely keywords
+if "error" in resp:
-    keywords = {
+    print(f"   WARN - HTTP issue: {resp['error']} ({elapsed:.0f}s)")
-        "What is my name?": ["Sarah", "Chen"],
+    print("   Waiting 60s for background completion...")
    time.sleep(60)
 else:
    print(f"   Completed in {elapsed:.1f}s")
    content = resp.get("content", "")
    print(f"   Response: {content[:120]}...")
 time.sleep(3)
 declarative_after = qdrant_count("declarative")
 new_facts = declarative_after - declarative_start
 print(f"\n   Declarative facts: {declarative_start} -> {declarative_after} (+{new_facts})")
 results["consolidation"] = new_facts >= 5
 print(f"   {'PASS' if results['consolidation'] else 'FAIL'} - {'>=5 facts' if results['consolidation'] else f'only {new_facts}'}")
 all_facts = qdrant_scroll("declarative")
 print(f"\n   All declarative facts ({len(all_facts)}):")
 for i, f in enumerate(all_facts, 1):
    content = f["payload"]["page_content"]
    meta = f["payload"].get("metadata", {})
    source = meta.get("source", "?")
    ftype = meta.get("fact_type", "?")
    print(f"      {i}. [{source}|{ftype}] {content}")
 # TEST 6: Duplicate Detection
 section("TEST 6: Duplicate Detection (2nd consolidation)")
 facts_before_2nd = qdrant_count("declarative")
 print(f"   Facts before: {facts_before_2nd}")
 print(f"   Running consolidation again...")
 resp = send_message("consolidate now", timeout=CONSOLIDATION_TIMEOUT)
 time.sleep(3)
 facts_after_2nd = qdrant_count("declarative")
 new_dupes = facts_after_2nd - facts_before_2nd
 print(f"   Facts after: {facts_after_2nd} (+{new_dupes})")
 results["dedup"] = new_dupes <= 2
 print(f"   {'PASS' if results['dedup'] else 'FAIL'} - {new_dupes} new facts (<=2 expected)")
 # TEST 7: Fact Recall
 section("TEST 7: Fact Recall via Natural Language")
 queries = {
    "What is my name?": ["sarah", "chen"],
    "How old am I?": ["28"],
-        "Where do I work?": ["Google", "data scientist"],
+    "Where do I live?": ["seattle"],
-        "What's my favorite color?": ["blue"],
+    "Where do I work?": ["microsoft", "software engineer"],
-        "What am I allergic to?": ["peanut"]
+    "What am I allergic to?": ["peanut"],
 }
-    if any(kw.lower() in answer.lower() for kw in keywords[query]):
+correct = 0
-        print("   ✓ Correct recall!")
+for question, keywords in queries.items():
-        correct_recalls += 1
+    resp = send_message(question)
-    else:
+    answer = resp.get("content", "")
-        print("   ⚠️  May not have recalled correctly")
+    hit = any(kw.lower() in answer.lower() for kw in keywords)
-    
+    if hit:
        correct += 1
    icon = "OK" if hit else "??"
    print(f"   {icon} Q: {question}")
    print(f"      A: {answer[:150]}")
    time.sleep(1)
-print(f"\n✅ Fact recall accuracy: {correct_recalls}/{len(queries)} ({correct_recalls/len(queries)*100:.0f}%)")
+accuracy = correct / len(queries) * 100
 results["recall"] = correct >= 3
 print(f"\n   Recall: {correct}/{len(queries)} ({accuracy:.0f}%)")
 print(f"   {'PASS' if results['recall'] else 'FAIL'} (threshold: >=3)")
-# TEST 6: Conversation History Recall
+# FINAL SUMMARY
-print("\n📋 TEST 6: Conversation History (Episodic) Recall")
+section("FINAL SUMMARY")
 print("-" * 70)
-print("Asking about conversation history...")
+total = len(results)
-resp = send_message("What have we talked about today?")
+passed = sum(1 for v in results.values() if v)
-summary = resp["content"]
+print()
-print(f"💬 Miku's summary:\n{summary}")
+for name, ok in results.items():
    print(f"   [{'PASS' if ok else 'FAIL'}] {name}")
-# Final Summary
+print(f"\n   Score: {passed}/{total}")
-print("\n" + "=" * 70)
+print(f"   Episodic: {qdrant_count('episodic')}")
-print("📊 FINAL SUMMARY")
+print(f"   Declarative: {qdrant_count('declarative')}")
 print("=" * 70)
 print(f"✅ Episodic memories: {get_qdrant_count('episodic')}")
 print(f"✅ Declarative facts: {declarative_count}")
 print(f"✅ Miku's messages stored: {len(miku_messages)}")
 print(f"✅ Fact recall accuracy: {correct_recalls}/{len(queries)}")
-# Overall verdict
+if passed == total:
-if declarative_count >= 5 and correct_recalls >= 3:
+    print("\n   ALL TESTS PASSED!")
-    print("\n🎉 PIPELINE TEST: PASS")
+elif passed >= total - 1:
-    print("   All major components working correctly!")
+    print("\n   MOSTLY PASSING - minor issues only")
 else:
-    print("\n⚠️  PIPELINE TEST: PARTIAL PASS")
+    print("\n   SOME TESTS FAILED - review above")
    print("   Some components may need adjustment")
 print("\n" + "=" * 70)