fix: resolve Cat personality startup race condition

Bot was calling restore_evil_cat_state() in on_ready() before Cheshire
Cat finished booting (~25s), causing all plugin toggle API calls to fail
silently. Evil Miku plugin was left disabled and the bot used Cat's
default personality instead.

Changes:
- cat_client.py: add wait_for_ready() that polls Cat health endpoint
  every 5s for up to 120s before attempting any admin API calls
- evil_mode.py: rewrite restore_evil_cat_state() with:
  - wait_for_ready() gate before any plugin/model switching
  - 3-second extra delay after Cat is up (plugin registry fully loaded)
  - up to 3 retries on failure
  - post-switch verification that the correct plugins are actually active

Also fixes helcyon model references that leaked into the container image
(cat_client.py was switching Cat's LLM to 'helcyon' which has no
llama-swap handler; reverted to correct 'darkidol' / 'llama3.1').
This commit is contained in:
2026-03-01 00:57:13 +02:00
parent f0b5d71097
commit a0a16e6784
2 changed files with 278 additions and 10 deletions

View File

@@ -146,10 +146,15 @@ class CatAdapter:
payload["discord_guild_id"] = str(guild_id)
if author_name:
payload["discord_author_name"] = author_name
if mood:
# When evil mode is active, send the evil mood name instead of the normal mood
if globals.EVIL_MODE:
payload["discord_mood"] = getattr(globals, 'EVIL_DM_MOOD', 'evil_neutral')
elif mood:
payload["discord_mood"] = mood
if response_type:
payload["discord_response_type"] = response_type
# Pass evil mode flag so discord_bridge stores it in working_memory
payload["discord_evil_mode"] = globals.EVIL_MODE
try:
# Build WebSocket URL from HTTP base URL
@@ -634,6 +639,222 @@ class CatAdapter:
logger.error(f"Consolidation error: {e}")
return None
# ====================================================================
# Admin API helpers plugin toggling & LLM model switching
# ====================================================================
async def wait_for_ready(self, max_wait: int = 120, interval: int = 5) -> bool:
"""Wait for Cat to become reachable, polling with interval.
Used on startup to avoid race conditions when bot starts before Cat.
Returns True once Cat responds, False if max_wait exceeded.
"""
start = time.time()
attempt = 0
while time.time() - start < max_wait:
attempt += 1
try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"{self._base_url}/",
timeout=aiohttp.ClientTimeout(total=5),
) as resp:
if resp.status == 200:
elapsed = time.time() - start
logger.info(f"🐱 Cat is ready (took {elapsed:.1f}s, {attempt} attempts)")
self._healthy = True
self._last_health_check = time.time()
return True
except Exception:
pass
if attempt == 1:
logger.info(f"⏳ Waiting for Cat to become ready (up to {max_wait}s)...")
await asyncio.sleep(interval)
logger.error(f"Cat did not become ready within {max_wait}s ({attempt} attempts)")
return False
async def toggle_plugin(self, plugin_id: str) -> bool:
"""Toggle a Cat plugin on/off via the admin API.
PUT /plugins/toggle/{plugin_id}
Returns True on success, False on failure.
"""
url = f"{self._base_url}/plugins/toggle/{plugin_id}"
try:
async with aiohttp.ClientSession() as session:
async with session.put(
url,
headers=self._get_headers(),
timeout=aiohttp.ClientTimeout(total=15),
) as resp:
if resp.status == 200:
logger.info(f"🐱 Toggled Cat plugin: {plugin_id}")
return True
else:
body = await resp.text()
logger.error(f"Cat plugin toggle failed ({resp.status}): {body}")
return False
except Exception as e:
logger.error(f"Cat plugin toggle error for {plugin_id}: {e}")
return False
async def set_llm_model(self, model_name: str) -> bool:
"""Switch the Cheshire Cat's active LLM model via settings API.
The Cat settings API uses UUIDs: we must first GET /settings/ to find
the setting_id for LLMOpenAIChatConfig, then PUT /settings/{setting_id}.
llama-swap handles the actual model loading based on model_name.
Returns True on success, False on failure.
"""
try:
# Step 1: Find the setting_id for LLMOpenAIChatConfig
setting_id = None
async with aiohttp.ClientSession() as session:
async with session.get(
f"{self._base_url}/settings/",
headers=self._get_headers(),
timeout=aiohttp.ClientTimeout(total=10),
) as resp:
if resp.status != 200:
logger.error(f"Cat settings GET failed ({resp.status})")
return False
data = await resp.json()
for s in data.get("settings", []):
if s.get("name") == "LLMOpenAIChatConfig":
setting_id = s["setting_id"]
break
if not setting_id:
logger.error("Could not find LLMOpenAIChatConfig setting_id in Cat settings")
return False
# Step 2: PUT updated config to /settings/{setting_id}
payload = {
"name": "LLMOpenAIChatConfig",
"value": {
"openai_api_key": "sk-dummy",
"model_name": model_name,
"temperature": 0.8,
"streaming": False,
},
"category": "llm_factory",
}
async with aiohttp.ClientSession() as session:
async with session.put(
f"{self._base_url}/settings/{setting_id}",
json=payload,
headers=self._get_headers(),
timeout=aiohttp.ClientTimeout(total=15),
) as resp:
if resp.status == 200:
logger.info(f"🐱 Set Cat LLM model to: {model_name}")
return True
else:
body = await resp.text()
logger.error(f"Cat LLM model switch failed ({resp.status}): {body}")
return False
except Exception as e:
logger.error(f"Cat LLM model switch error: {e}")
return False
async def get_active_plugins(self) -> list:
"""Get list of active Cat plugin IDs.
GET /plugins → returns {\"installed\": [...], \"filters\": {...}}
Each plugin has \"id\" and \"active\" fields.
"""
url = f"{self._base_url}/plugins"
try:
async with aiohttp.ClientSession() as session:
async with session.get(
url,
headers=self._get_headers(),
timeout=aiohttp.ClientTimeout(total=10),
) as resp:
if resp.status == 200:
data = await resp.json()
installed = data.get("installed", [])
return [p["id"] for p in installed if p.get("active")]
else:
logger.error(f"Cat get_active_plugins failed ({resp.status})")
return []
except Exception as e:
logger.error(f"Cat get_active_plugins error: {e}")
return []
async def switch_to_evil_personality(self) -> bool:
"""Disable miku_personality, enable evil_miku_personality, switch LLM to darkidol.
Checks current plugin state first to avoid double-toggling
(the Cat API is a toggle, not enable/disable).
Returns True if all operations succeed, False if any fail.
"""
logger.info("🐱 Switching Cat to Evil Miku personality...")
success = True
# Check current plugin state
active = await self.get_active_plugins()
# Step 1: Disable normal personality (only if currently active)
if "miku_personality" in active:
if not await self.toggle_plugin("miku_personality"):
logger.error("Failed to disable miku_personality plugin")
success = False
await asyncio.sleep(1)
else:
logger.debug("miku_personality already disabled, skipping toggle")
# Step 2: Enable evil personality (only if currently inactive)
if "evil_miku_personality" not in active:
if not await self.toggle_plugin("evil_miku_personality"):
logger.error("Failed to enable evil_miku_personality plugin")
success = False
else:
logger.debug("evil_miku_personality already active, skipping toggle")
# Step 3: Switch LLM model to darkidol (the uncensored evil model)
if not await self.set_llm_model("darkidol"):
logger.error("Failed to switch Cat LLM to darkidol")
success = False
return success
async def switch_to_normal_personality(self) -> bool:
"""Disable evil_miku_personality, enable miku_personality, switch LLM to llama3.1.
Checks current plugin state first to avoid double-toggling.
Returns True if all operations succeed, False if any fail.
"""
logger.info("🐱 Switching Cat to normal Miku personality...")
success = True
# Check current plugin state
active = await self.get_active_plugins()
# Step 1: Disable evil personality (only if currently active)
if "evil_miku_personality" in active:
if not await self.toggle_plugin("evil_miku_personality"):
logger.error("Failed to disable evil_miku_personality plugin")
success = False
await asyncio.sleep(1)
else:
logger.debug("evil_miku_personality already disabled, skipping toggle")
# Step 2: Enable normal personality (only if currently inactive)
if "miku_personality" not in active:
if not await self.toggle_plugin("miku_personality"):
logger.error("Failed to enable miku_personality plugin")
success = False
else:
logger.debug("miku_personality already active, skipping toggle")
# Step 3: Switch LLM model back to llama3.1 (normal model)
if not await self.set_llm_model("llama3.1"):
logger.error("Failed to switch Cat LLM to llama3.1")
success = False
return success
# Singleton instance
cat_adapter = CatAdapter()

View File

@@ -109,21 +109,68 @@ async def restore_evil_cat_state():
"""Switch Cat to the correct personality plugin + LLM model based on evil mode state.
Must be called after the event loop is running (e.g., in on_ready).
Waits for Cat to become reachable, then retries plugin switching with
verification to handle the common race condition where bot starts before Cat.
"""
try:
from utils.cat_client import cat_adapter
if not globals.USE_CHESHIRE_CAT:
return
# Wait for Cat to actually be reachable before attempting any API calls
if not await cat_adapter.wait_for_ready(max_wait=120, interval=5):
logger.error("Cat never became ready — cannot restore personality state")
return
# Small extra delay to let Cat fully initialize plugins after health endpoint is up
await asyncio.sleep(3)
max_retries = 3
retry_delay = 5
for attempt in range(1, max_retries + 1):
try:
if globals.EVIL_MODE:
if attempt == 1:
logger.info("Restoring Cat evil personality state on startup...")
else:
logger.info(f"Retry {attempt}/{max_retries}: restoring Cat evil personality...")
await cat_adapter.switch_to_evil_personality()
else:
# Ensure normal state is active (in case evil was toggled off while Cat was down)
active = await cat_adapter.get_active_plugins()
if "evil_miku_personality" in active:
logger.info("Evil plugin still active after normal restore — switching to normal...")
await cat_adapter.switch_to_normal_personality()
else:
# Normal mode, normal plugins — nothing to do
return
# Verify the switch actually worked
await asyncio.sleep(2)
active = await cat_adapter.get_active_plugins()
if globals.EVIL_MODE:
if "evil_miku_personality" in active and "miku_personality" not in active:
logger.info("✅ Cat evil personality verified active")
return
else:
logger.warning(f"Cat plugin verification failed (attempt {attempt}): "
f"evil_active={'evil_miku_personality' in active}, "
f"normal_active={'miku_personality' in active}")
else:
if "miku_personality" in active and "evil_miku_personality" not in active:
logger.info("✅ Cat normal personality verified active")
return
else:
logger.warning(f"Cat plugin verification failed (attempt {attempt})")
except Exception as e:
logger.error(f"Cat personality restore attempt {attempt} error: {e}")
if attempt < max_retries:
await asyncio.sleep(retry_delay)
logger.error(f"Failed to restore Cat personality after {max_retries} attempts")
except Exception as e:
logger.error(f"Failed to restore Cat personality state on startup: {e}")