fix: resolve Cat personality startup race condition

Bot was calling restore_evil_cat_state() in on_ready() before Cheshire Cat finished booting (~25s), causing all plugin toggle API calls to fail silently. Evil Miku plugin was left disabled and the bot used Cat's default personality instead. Changes: - cat_client.py: add wait_for_ready() that polls Cat health endpoint every 5s for up to 120s before attempting any admin API calls - evil_mode.py: rewrite restore_evil_cat_state() with: - wait_for_ready() gate before any plugin/model switching - 3-second extra delay after Cat is up (plugin registry fully loaded) - up to 3 retries on failure - post-switch verification that the correct plugins are actually active Also fixes helcyon model references that leaked into the container image (cat_client.py was switching Cat's LLM to 'helcyon' which has no llama-swap handler; reverted to correct 'darkidol' / 'llama3.1').
2026-03-01 00:57:13 +02:00
parent f0b5d71097
commit a0a16e6784
2 changed files with 278 additions and 10 deletions
--- a/bot/utils/evil_mode.py
+++ b/bot/utils/evil_mode.py
@@ -109,21 +109,68 @@ async def restore_evil_cat_state():
    """Switch Cat to the correct personality plugin + LLM model based on evil mode state.
    
    Must be called after the event loop is running (e.g., in on_ready).
+    Waits for Cat to become reachable, then retries plugin switching with
+    verification to handle the common race condition where bot starts before Cat.
    """
    try:
        from utils.cat_client import cat_adapter
        if not globals.USE_CHESHIRE_CAT:
            return
        
-        if globals.EVIL_MODE:
-            logger.info("Restoring Cat evil personality state on startup...")
-            await cat_adapter.switch_to_evil_personality()
-        else:
-            # Ensure normal state is active (in case evil was toggled off while Cat was down)
-            active = await cat_adapter.get_active_plugins()
-            if "evil_miku_personality" in active:
-                logger.info("Evil plugin still active after normal restore — switching to normal...")
-                await cat_adapter.switch_to_normal_personality()
+        # Wait for Cat to actually be reachable before attempting any API calls
+        if not await cat_adapter.wait_for_ready(max_wait=120, interval=5):
+            logger.error("Cat never became ready — cannot restore personality state")
+            return
+        
+        # Small extra delay to let Cat fully initialize plugins after health endpoint is up
+        await asyncio.sleep(3)
+        
+        max_retries = 3
+        retry_delay = 5
+        
+        for attempt in range(1, max_retries + 1):
+            try:
+                if globals.EVIL_MODE:
+                    if attempt == 1:
+                        logger.info("Restoring Cat evil personality state on startup...")
+                    else:
+                        logger.info(f"Retry {attempt}/{max_retries}: restoring Cat evil personality...")
+                    await cat_adapter.switch_to_evil_personality()
+                else:
+                    active = await cat_adapter.get_active_plugins()
+                    if "evil_miku_personality" in active:
+                        logger.info("Evil plugin still active after normal restore — switching to normal...")
+                        await cat_adapter.switch_to_normal_personality()
+                    else:
+                        # Normal mode, normal plugins — nothing to do
+                        return
+                
+                # Verify the switch actually worked
+                await asyncio.sleep(2)
+                active = await cat_adapter.get_active_plugins()
+                
+                if globals.EVIL_MODE:
+                    if "evil_miku_personality" in active and "miku_personality" not in active:
+                        logger.info("✅ Cat evil personality verified active")
+                        return
+                    else:
+                        logger.warning(f"Cat plugin verification failed (attempt {attempt}): "
+                                       f"evil_active={'evil_miku_personality' in active}, "
+                                       f"normal_active={'miku_personality' in active}")
+                else:
+                    if "miku_personality" in active and "evil_miku_personality" not in active:
+                        logger.info("✅ Cat normal personality verified active")
+                        return
+                    else:
+                        logger.warning(f"Cat plugin verification failed (attempt {attempt})")
+                
+            except Exception as e:
+                logger.error(f"Cat personality restore attempt {attempt} error: {e}")
+            
+            if attempt < max_retries:
+                await asyncio.sleep(retry_delay)
+        
+        logger.error(f"Failed to restore Cat personality after {max_retries} attempts")
    except Exception as e:
        logger.error(f"Failed to restore Cat personality state on startup: {e}")