fix: resolve Cat personality startup race condition

Bot was calling restore_evil_cat_state() in on_ready() before Cheshire
Cat finished booting (~25s), causing all plugin toggle API calls to fail
silently. Evil Miku plugin was left disabled and the bot used Cat's
default personality instead.

Changes:
- cat_client.py: add wait_for_ready() that polls Cat health endpoint
  every 5s for up to 120s before attempting any admin API calls
- evil_mode.py: rewrite restore_evil_cat_state() with:
  - wait_for_ready() gate before any plugin/model switching
  - 3-second extra delay after Cat is up (plugin registry fully loaded)
  - up to 3 retries on failure
  - post-switch verification that the correct plugins are actually active

Also fixes helcyon model references that leaked into the container image
(cat_client.py was switching Cat's LLM to 'helcyon' which has no
llama-swap handler; reverted to correct 'darkidol' / 'llama3.1').
This commit is contained in:
2026-03-01 00:57:13 +02:00
parent f0b5d71097
commit a0a16e6784
2 changed files with 278 additions and 10 deletions

View File

@@ -109,21 +109,68 @@ async def restore_evil_cat_state():
"""Switch Cat to the correct personality plugin + LLM model based on evil mode state.
Must be called after the event loop is running (e.g., in on_ready).
Waits for Cat to become reachable, then retries plugin switching with
verification to handle the common race condition where bot starts before Cat.
"""
try:
from utils.cat_client import cat_adapter
if not globals.USE_CHESHIRE_CAT:
return
if globals.EVIL_MODE:
logger.info("Restoring Cat evil personality state on startup...")
await cat_adapter.switch_to_evil_personality()
else:
# Ensure normal state is active (in case evil was toggled off while Cat was down)
active = await cat_adapter.get_active_plugins()
if "evil_miku_personality" in active:
logger.info("Evil plugin still active after normal restore — switching to normal...")
await cat_adapter.switch_to_normal_personality()
# Wait for Cat to actually be reachable before attempting any API calls
if not await cat_adapter.wait_for_ready(max_wait=120, interval=5):
logger.error("Cat never became ready — cannot restore personality state")
return
# Small extra delay to let Cat fully initialize plugins after health endpoint is up
await asyncio.sleep(3)
max_retries = 3
retry_delay = 5
for attempt in range(1, max_retries + 1):
try:
if globals.EVIL_MODE:
if attempt == 1:
logger.info("Restoring Cat evil personality state on startup...")
else:
logger.info(f"Retry {attempt}/{max_retries}: restoring Cat evil personality...")
await cat_adapter.switch_to_evil_personality()
else:
active = await cat_adapter.get_active_plugins()
if "evil_miku_personality" in active:
logger.info("Evil plugin still active after normal restore — switching to normal...")
await cat_adapter.switch_to_normal_personality()
else:
# Normal mode, normal plugins — nothing to do
return
# Verify the switch actually worked
await asyncio.sleep(2)
active = await cat_adapter.get_active_plugins()
if globals.EVIL_MODE:
if "evil_miku_personality" in active and "miku_personality" not in active:
logger.info("✅ Cat evil personality verified active")
return
else:
logger.warning(f"Cat plugin verification failed (attempt {attempt}): "
f"evil_active={'evil_miku_personality' in active}, "
f"normal_active={'miku_personality' in active}")
else:
if "miku_personality" in active and "evil_miku_personality" not in active:
logger.info("✅ Cat normal personality verified active")
return
else:
logger.warning(f"Cat plugin verification failed (attempt {attempt})")
except Exception as e:
logger.error(f"Cat personality restore attempt {attempt} error: {e}")
if attempt < max_retries:
await asyncio.sleep(retry_delay)
logger.error(f"Failed to restore Cat personality after {max_retries} attempts")
except Exception as e:
logger.error(f"Failed to restore Cat personality state on startup: {e}")