From a0a16e6784365cacda40453eb0d98f0739e76797 Mon Sep 17 00:00:00 2001 From: koko210Serve Date: Sun, 1 Mar 2026 00:57:13 +0200 Subject: [PATCH] fix: resolve Cat personality startup race condition Bot was calling restore_evil_cat_state() in on_ready() before Cheshire Cat finished booting (~25s), causing all plugin toggle API calls to fail silently. Evil Miku plugin was left disabled and the bot used Cat's default personality instead. Changes: - cat_client.py: add wait_for_ready() that polls Cat health endpoint every 5s for up to 120s before attempting any admin API calls - evil_mode.py: rewrite restore_evil_cat_state() with: - wait_for_ready() gate before any plugin/model switching - 3-second extra delay after Cat is up (plugin registry fully loaded) - up to 3 retries on failure - post-switch verification that the correct plugins are actually active Also fixes helcyon model references that leaked into the container image (cat_client.py was switching Cat's LLM to 'helcyon' which has no llama-swap handler; reverted to correct 'darkidol' / 'llama3.1'). --- bot/utils/cat_client.py | 223 +++++++++++++++++++++++++++++++++++++++- bot/utils/evil_mode.py | 65 ++++++++++-- 2 files changed, 278 insertions(+), 10 deletions(-) diff --git a/bot/utils/cat_client.py b/bot/utils/cat_client.py index 85eb488..d55c3f9 100644 --- a/bot/utils/cat_client.py +++ b/bot/utils/cat_client.py @@ -146,10 +146,15 @@ class CatAdapter: payload["discord_guild_id"] = str(guild_id) if author_name: payload["discord_author_name"] = author_name - if mood: + # When evil mode is active, send the evil mood name instead of the normal mood + if globals.EVIL_MODE: + payload["discord_mood"] = getattr(globals, 'EVIL_DM_MOOD', 'evil_neutral') + elif mood: payload["discord_mood"] = mood if response_type: payload["discord_response_type"] = response_type + # Pass evil mode flag so discord_bridge stores it in working_memory + payload["discord_evil_mode"] = globals.EVIL_MODE try: # Build WebSocket URL from HTTP base URL @@ -634,6 +639,222 @@ class CatAdapter: logger.error(f"Consolidation error: {e}") return None + # ==================================================================== + # Admin API helpers – plugin toggling & LLM model switching + # ==================================================================== + + async def wait_for_ready(self, max_wait: int = 120, interval: int = 5) -> bool: + """Wait for Cat to become reachable, polling with interval. + + Used on startup to avoid race conditions when bot starts before Cat. + Returns True once Cat responds, False if max_wait exceeded. + """ + start = time.time() + attempt = 0 + while time.time() - start < max_wait: + attempt += 1 + try: + async with aiohttp.ClientSession() as session: + async with session.get( + f"{self._base_url}/", + timeout=aiohttp.ClientTimeout(total=5), + ) as resp: + if resp.status == 200: + elapsed = time.time() - start + logger.info(f"🐱 Cat is ready (took {elapsed:.1f}s, {attempt} attempts)") + self._healthy = True + self._last_health_check = time.time() + return True + except Exception: + pass + if attempt == 1: + logger.info(f"⏳ Waiting for Cat to become ready (up to {max_wait}s)...") + await asyncio.sleep(interval) + logger.error(f"Cat did not become ready within {max_wait}s ({attempt} attempts)") + return False + + async def toggle_plugin(self, plugin_id: str) -> bool: + """Toggle a Cat plugin on/off via the admin API. + + PUT /plugins/toggle/{plugin_id} + Returns True on success, False on failure. + """ + url = f"{self._base_url}/plugins/toggle/{plugin_id}" + try: + async with aiohttp.ClientSession() as session: + async with session.put( + url, + headers=self._get_headers(), + timeout=aiohttp.ClientTimeout(total=15), + ) as resp: + if resp.status == 200: + logger.info(f"🐱 Toggled Cat plugin: {plugin_id}") + return True + else: + body = await resp.text() + logger.error(f"Cat plugin toggle failed ({resp.status}): {body}") + return False + except Exception as e: + logger.error(f"Cat plugin toggle error for {plugin_id}: {e}") + return False + + async def set_llm_model(self, model_name: str) -> bool: + """Switch the Cheshire Cat's active LLM model via settings API. + + The Cat settings API uses UUIDs: we must first GET /settings/ to find + the setting_id for LLMOpenAIChatConfig, then PUT /settings/{setting_id}. + llama-swap handles the actual model loading based on model_name. + Returns True on success, False on failure. + """ + try: + # Step 1: Find the setting_id for LLMOpenAIChatConfig + setting_id = None + async with aiohttp.ClientSession() as session: + async with session.get( + f"{self._base_url}/settings/", + headers=self._get_headers(), + timeout=aiohttp.ClientTimeout(total=10), + ) as resp: + if resp.status != 200: + logger.error(f"Cat settings GET failed ({resp.status})") + return False + data = await resp.json() + for s in data.get("settings", []): + if s.get("name") == "LLMOpenAIChatConfig": + setting_id = s["setting_id"] + break + + if not setting_id: + logger.error("Could not find LLMOpenAIChatConfig setting_id in Cat settings") + return False + + # Step 2: PUT updated config to /settings/{setting_id} + payload = { + "name": "LLMOpenAIChatConfig", + "value": { + "openai_api_key": "sk-dummy", + "model_name": model_name, + "temperature": 0.8, + "streaming": False, + }, + "category": "llm_factory", + } + async with aiohttp.ClientSession() as session: + async with session.put( + f"{self._base_url}/settings/{setting_id}", + json=payload, + headers=self._get_headers(), + timeout=aiohttp.ClientTimeout(total=15), + ) as resp: + if resp.status == 200: + logger.info(f"🐱 Set Cat LLM model to: {model_name}") + return True + else: + body = await resp.text() + logger.error(f"Cat LLM model switch failed ({resp.status}): {body}") + return False + except Exception as e: + logger.error(f"Cat LLM model switch error: {e}") + return False + + async def get_active_plugins(self) -> list: + """Get list of active Cat plugin IDs. + + GET /plugins → returns {\"installed\": [...], \"filters\": {...}} + Each plugin has \"id\" and \"active\" fields. + """ + url = f"{self._base_url}/plugins" + try: + async with aiohttp.ClientSession() as session: + async with session.get( + url, + headers=self._get_headers(), + timeout=aiohttp.ClientTimeout(total=10), + ) as resp: + if resp.status == 200: + data = await resp.json() + installed = data.get("installed", []) + return [p["id"] for p in installed if p.get("active")] + else: + logger.error(f"Cat get_active_plugins failed ({resp.status})") + return [] + except Exception as e: + logger.error(f"Cat get_active_plugins error: {e}") + return [] + + async def switch_to_evil_personality(self) -> bool: + """Disable miku_personality, enable evil_miku_personality, switch LLM to darkidol. + + Checks current plugin state first to avoid double-toggling + (the Cat API is a toggle, not enable/disable). + Returns True if all operations succeed, False if any fail. + """ + logger.info("🐱 Switching Cat to Evil Miku personality...") + success = True + + # Check current plugin state + active = await self.get_active_plugins() + + # Step 1: Disable normal personality (only if currently active) + if "miku_personality" in active: + if not await self.toggle_plugin("miku_personality"): + logger.error("Failed to disable miku_personality plugin") + success = False + await asyncio.sleep(1) + else: + logger.debug("miku_personality already disabled, skipping toggle") + + # Step 2: Enable evil personality (only if currently inactive) + if "evil_miku_personality" not in active: + if not await self.toggle_plugin("evil_miku_personality"): + logger.error("Failed to enable evil_miku_personality plugin") + success = False + else: + logger.debug("evil_miku_personality already active, skipping toggle") + + # Step 3: Switch LLM model to darkidol (the uncensored evil model) + if not await self.set_llm_model("darkidol"): + logger.error("Failed to switch Cat LLM to darkidol") + success = False + + return success + + async def switch_to_normal_personality(self) -> bool: + """Disable evil_miku_personality, enable miku_personality, switch LLM to llama3.1. + + Checks current plugin state first to avoid double-toggling. + Returns True if all operations succeed, False if any fail. + """ + logger.info("🐱 Switching Cat to normal Miku personality...") + success = True + + # Check current plugin state + active = await self.get_active_plugins() + + # Step 1: Disable evil personality (only if currently active) + if "evil_miku_personality" in active: + if not await self.toggle_plugin("evil_miku_personality"): + logger.error("Failed to disable evil_miku_personality plugin") + success = False + await asyncio.sleep(1) + else: + logger.debug("evil_miku_personality already disabled, skipping toggle") + + # Step 2: Enable normal personality (only if currently inactive) + if "miku_personality" not in active: + if not await self.toggle_plugin("miku_personality"): + logger.error("Failed to enable miku_personality plugin") + success = False + else: + logger.debug("miku_personality already active, skipping toggle") + + # Step 3: Switch LLM model back to llama3.1 (normal model) + if not await self.set_llm_model("llama3.1"): + logger.error("Failed to switch Cat LLM to llama3.1") + success = False + + return success + # Singleton instance cat_adapter = CatAdapter() diff --git a/bot/utils/evil_mode.py b/bot/utils/evil_mode.py index 06a9604..51db133 100644 --- a/bot/utils/evil_mode.py +++ b/bot/utils/evil_mode.py @@ -109,21 +109,68 @@ async def restore_evil_cat_state(): """Switch Cat to the correct personality plugin + LLM model based on evil mode state. Must be called after the event loop is running (e.g., in on_ready). + Waits for Cat to become reachable, then retries plugin switching with + verification to handle the common race condition where bot starts before Cat. """ try: from utils.cat_client import cat_adapter if not globals.USE_CHESHIRE_CAT: return - if globals.EVIL_MODE: - logger.info("Restoring Cat evil personality state on startup...") - await cat_adapter.switch_to_evil_personality() - else: - # Ensure normal state is active (in case evil was toggled off while Cat was down) - active = await cat_adapter.get_active_plugins() - if "evil_miku_personality" in active: - logger.info("Evil plugin still active after normal restore — switching to normal...") - await cat_adapter.switch_to_normal_personality() + # Wait for Cat to actually be reachable before attempting any API calls + if not await cat_adapter.wait_for_ready(max_wait=120, interval=5): + logger.error("Cat never became ready — cannot restore personality state") + return + + # Small extra delay to let Cat fully initialize plugins after health endpoint is up + await asyncio.sleep(3) + + max_retries = 3 + retry_delay = 5 + + for attempt in range(1, max_retries + 1): + try: + if globals.EVIL_MODE: + if attempt == 1: + logger.info("Restoring Cat evil personality state on startup...") + else: + logger.info(f"Retry {attempt}/{max_retries}: restoring Cat evil personality...") + await cat_adapter.switch_to_evil_personality() + else: + active = await cat_adapter.get_active_plugins() + if "evil_miku_personality" in active: + logger.info("Evil plugin still active after normal restore — switching to normal...") + await cat_adapter.switch_to_normal_personality() + else: + # Normal mode, normal plugins — nothing to do + return + + # Verify the switch actually worked + await asyncio.sleep(2) + active = await cat_adapter.get_active_plugins() + + if globals.EVIL_MODE: + if "evil_miku_personality" in active and "miku_personality" not in active: + logger.info("✅ Cat evil personality verified active") + return + else: + logger.warning(f"Cat plugin verification failed (attempt {attempt}): " + f"evil_active={'evil_miku_personality' in active}, " + f"normal_active={'miku_personality' in active}") + else: + if "miku_personality" in active and "evil_miku_personality" not in active: + logger.info("✅ Cat normal personality verified active") + return + else: + logger.warning(f"Cat plugin verification failed (attempt {attempt})") + + except Exception as e: + logger.error(f"Cat personality restore attempt {attempt} error: {e}") + + if attempt < max_retries: + await asyncio.sleep(retry_delay) + + logger.error(f"Failed to restore Cat personality after {max_retries} attempts") except Exception as e: logger.error(f"Failed to restore Cat personality state on startup: {e}")