reorganize: move all test scripts to tests/ directory

- Moved 8 root-level test scripts + 2 from bot/ to tests/ - Moved run_rocinante_test.sh runner script to tests/ - Added tests/README.md documenting each test's purpose, type, and requirements - Added test_pfp_context.py and test_rocinante_comparison.py (previously untracked)
2026-03-04 00:18:21 +02:00
parent 431f675fc7
commit fdde12c03d
12 changed files with 730 additions and 0 deletions
--- a/tests/README.md
+++ b/tests/README.md
@@ -0,0 +1,56 @@
 # Tests
 Ad-hoc test scripts for the Miku Discord Bot. None of these use a formal test framework — they are standalone scripts written during development to validate specific features.
 ## Test Index
 | Script | Type | Requirements | Purpose |
 |--------|------|-------------|---------|
 | `test_addressing.py` | Unit (self-contained) | None | Tests regex patterns for detecting when Miku is addressed in messages. Replicates logic from `bot/utils/core.py`. |
 | `test_pfp_context.py` | Unit (self-contained) | None | Tests regex patterns for detecting profile-picture-related queries. |
 | `test_conversation_history.py` | Unit | Built-in mocks | Tests conversation history management logic. |
 | `test_error_handler.py` | Unit | Built-in mocks | Tests error handling utilities. |
 | `test_evil_moods.py` | Integration | Running Cheshire Cat + Qdrant | Connects via WebSocket and tests all 10 evil mood personalities with sample messages. |
 | `test_full_pipeline.py` | Integration | Running Cat + Qdrant | End-to-end test of the memory consolidation system v2. |
 | `test_tts_audio.py` | Integration | Run **inside** miku-bot container | Tests the TTS audio streaming pipeline. |
 | `test_voice_playback.py` | Integration | Active Discord voice session | Tests audio playback in a live voice channel. |
 | `test_websocket.py` | Integration | RVC server at `172.25.0.1:8765` | Tests WebSocket communication with the RVC voice conversion server. |
 | `test_rocinante_comparison.py` | Benchmark | Full stack (llama-swap-amd, Cat) | Benchmarks Rocinante-X 12B model through both Normal and Evil Miku scenarios. Outputs to `/tmp/test_rocinante_comparison.log`. |
 | `run_rocinante_test.sh` | Shell runner | Docker, full stack | Wrapper script that copies `test_rocinante_comparison.py` into the miku-bot container and runs it. |
 ## Running Tests
 ### Self-contained unit tests (no services needed)
 ```bash
 python3 tests/test_addressing.py
 python3 tests/test_pfp_context.py
 python3 tests/test_conversation_history.py
 python3 tests/test_error_handler.py
 ```
 ### Integration tests (require running Docker services)
 ```bash
 # Evil moods — needs Cat + Qdrant running
 python3 tests/test_evil_moods.py
 # Memory consolidation pipeline — needs Cat + Qdrant
 python3 tests/test_full_pipeline.py
 # TTS — run inside the miku-bot container
 docker exec miku-bot python3 /app/tests/test_tts_audio.py
 # Voice playback — needs an active voice session
 python3 tests/test_voice_playback.py
 # WebSocket to RVC — needs RVC server running
 python3 tests/test_websocket.py
 ```
 ### Benchmark tests
 ```bash
 # Rocinante model comparison (takes a while)
 ./tests/run_rocinante_test.sh
 ```
--- a/tests/run_rocinante_test.sh
+++ b/tests/run_rocinante_test.sh
@@ -0,0 +1,36 @@
 #!/bin/bash
 # Run the Rocinante comparison test inside the miku-bot container
 # (which has aiohttp, docker access, and network connectivity to Cat)
 set -e
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 TEST_FILE="$SCRIPT_DIR/test_rocinante_comparison.py"
 CONTAINER="miku-bot"
 LOG_FILE="/tmp/test_rocinante_comparison.log"
 echo "=== Rocinante-X 12B Comparison Test ==="
 echo ""
 # 1. Copy test script into the container
 echo "[1/4] Copying test script into $CONTAINER..."
 docker cp "$TEST_FILE" "$CONTAINER:/tmp/test_rocinante_comparison.py"
 # 2. Restart llama-swap-amd to pick up the new rocinante config
 echo "[2/4] Restarting llama-swap-amd to load new config..."
 docker restart llama-swap-amd
 echo "  Waiting 10s for llama-swap-amd to be ready..."
 sleep 10
 # 3. Run the test inside the container (interactive for live output)
 echo "[3/4] Running test inside $CONTAINER (this will take a while)..."
 echo ""
 docker exec -t "$CONTAINER" python3 /tmp/test_rocinante_comparison.py
 # 4. Copy log back to host
 echo ""
 echo "[4/4] Copying log file to host..."
 docker cp "$CONTAINER:$LOG_FILE" "$LOG_FILE"
 echo ""
 echo "✓ Done! Log file: $LOG_FILE"
 echo "  Compare with: diff <(cat /tmp/test_comparison_live.log) <(cat $LOG_FILE)"
--- a/tests/test_addressing.py
+++ b/tests/test_addressing.py
--- a/tests/test_conversation_history.py
+++ b/tests/test_conversation_history.py
--- a/tests/test_error_handler.py
+++ b/tests/test_error_handler.py
--- a/tests/test_evil_moods.py
+++ b/tests/test_evil_moods.py
--- a/tests/test_full_pipeline.py
+++ b/tests/test_full_pipeline.py
--- a/tests/test_pfp_context.py
+++ b/tests/test_pfp_context.py
@@ -0,0 +1,78 @@
 #!/usr/bin/env python3
 """
 Test profile picture context plugin
 """
 import re
 # Test patterns
 PFP_PATTERNS = [
    # Direct PFP references
    r'\b(what|describe|tell me about|explain|show|how)\b.*\b(pfp|profile pic|avatar|picture|pic)\b',
    r'\b(your|miku\'?s?)\b.*\b(pfp|profile pic|avatar|picture|pic)\b',
    r'\b(pfp|profile pic|avatar|picture|pic)\b.*\b(is|look|show|about|like)',
    # Questions about appearance
    r'\b(what|how).*\b(you|miku)\b.*(look|looking|appear)',
    r'\byour (new )?look\b',
    r'\b(what|how).*looking like\b',
    # Questions about the image itself
    r'\b(think|feel|opinion|thoughts)\b.*\b(about|of)\b.*\b(your|that|the|this)?\b.*\b(pfp|profile|avatar|pic|picture|image)\b',
    r'\b(why|how|when).*\b(pick|choose|chose|picked|select|change|changed)\b.*\b(pfp|profile|avatar|pic|picture|that)\b',
    r'\b(new|current|latest)\b.*\b(pfp|profile pic|avatar|pic|picture)\b',
    # "What do you think about your pfp"
    r'\bthink.*\b(your|that|the|this)\b.*\b(pfp|profile|avatar|pic|picture)\b',
    r'\b(your|that|the|this)\b.*\b(pfp|profile|avatar|pic|picture)\b.*\bthink\b',
    # "How did you decide/pick"
    r'\b(decide|decided|pick|picked|choose|chose|select)\b.*\b(pfp|profile|avatar|pic|picture|that|this)\b',
    # "Tell me about that pfp" / "What's with the pfp"
    r'\bwhat\'?s?\b.*\bwith\b.*\b(pfp|profile|avatar|pic|picture)\b',
    r'\btell me\b.*\b(pfp|profile|avatar|pic|picture|that|this)\b',
 ]
 test_queries = [
    # Original tests
    "What does your pfp look like?",
    "Describe your profile picture",
    "Tell me about your avatar",
    "What's your profile pic?",
    "How do you look today?",
    "Your new look is cool",
    "What are you looking like?",
    "Show me your picture",
    # User's examples
    "How did you decide to pick that pfp?",
    "What do you think about your new profile pic?",
    "What do you think about your pfp, Miku?",
    "How did you choose that avatar?",
    "Why did you pick that pfp?",
    "When did you change your profile pic?",
    "Tell me about that pfp",
    "What's with the pfp?",
    "Your current pfp is nice",
    "How did you decide on that picture?",
    # Should NOT match
    "What's the weather like?",
    "Hello Miku!",
    "How are you feeling?",
    "What do you think about music?",
 ]
 def matches_pfp_query(text: str) -> bool:
    """Check if the message is asking about the profile picture"""
    text_lower = text.lower()
    for pattern in PFP_PATTERNS:
        if re.search(pattern, text_lower, re.IGNORECASE):
            return True
    return False
 print("Testing PFP pattern matching:\n")
 for query in test_queries:
    result = matches_pfp_query(query)
    status = "✓ MATCH" if result else "✗ NO MATCH"
    print(f"{status}: {query}")
--- a/tests/test_rocinante_comparison.py
+++ b/tests/test_rocinante_comparison.py
@@ -0,0 +1,560 @@
 #!/usr/bin/env python3
 """
 Rocinante-X 12B Model Comparison Test
 ======================================
 Tests the Rocinante-X-12B-v1b model through the same scenarios used
 in the existing llama3.1/darkidol comparison, using Cheshire Cat as the
 inference pipeline with both Normal Miku and Evil Miku personalities.
 Outputs to /tmp/test_rocinante_comparison.log in the same format as
 /tmp/test_comparison_live.log for side-by-side comparison.
 Model under test: Rocinante-X-12B-v1b-Q5_K_M (12B params, Q5_K_M quant)
 Running on: AMD RX 6800 via llama-swap-amd (ROCm)
 Usage:
    # From the host, run via the miku-bot container:
    ./run_rocinante_test.sh
    # Or manually:
    docker cp test_rocinante_comparison.py miku-bot:/tmp/
    docker exec miku-bot python3 /tmp/test_rocinante_comparison.py
    # Log will be at /tmp/test_rocinante_comparison.log inside the container
    # and auto-copied to the host at the end.
 Prerequisites:
    - llama-swap-amd container running with rocinante in config
    - cheshire-cat container running and healthy
    - Runs inside miku-bot container (has aiohttp + docker access)
 """
 import asyncio
 import aiohttp
 import time
 import sys
 import subprocess
 import json
 from datetime import datetime
 # ─── Configuration ───────────────────────────────────────────────────────────
 # Inside Docker network: Cat is reachable via service name
 CAT_URL = "http://cheshire-cat:80"
 CAT_CONTAINER = "miku-cheshire-cat"  # actual container name (docker restart needs this)
 LOG_FILE = "/tmp/test_rocinante_comparison.log"
 # The model we're testing
 TEST_MODEL = "rocinante"
 TEST_MODEL_DISPLAY = "ROCINANTE-12B"
 # Personality combos to test: (model_name_for_llama_swap, personality_label, plugin_to_enable, plugin_to_disable)
 COMBOS = [
    {
        "model": "rocinante",
        "personality": "miku",
        "personality_label": "NORMAL MIKU",
        "enable_plugin": "miku_personality",
        "disable_plugin": "evil_miku_personality",
    },
    {
        "model": "rocinante",
        "personality": "evil_miku",
        "personality_label": "EVIL MIKU",
        "enable_plugin": "evil_miku_personality",
        "disable_plugin": "miku_personality",
    },
 ]
 # ─── Normal Miku Scenarios (same as comparison log) ─────────────────────────
 NORMAL_SCENARIOS = [
    {
        "name": "casual_greeting",
        "desc": "Simple casual greeting — how does the model open?",
        "messages": [
            ("Koko", "hey miku whats up"),
        ],
    },
    {
        "name": "multi_turn_chat",
        "desc": "Multi-turn casual conversation with follow-ups",
        "messages": [
            ("Koko", "miku what have you been up to today?"),
            ("Koko", "that sounds fun! did you work on any new songs?"),
            ("Koko", "what kind of song? something upbeat or more chill?"),
        ],
    },
    {
        "name": "lore_knowledge",
        "desc": "Testing character knowledge — Vocaloid lore, friends, facts",
        "messages": [
            ("Neko_Chan", "hey miku who are your best friends?"),
            ("Neko_Chan", "what about KAITO? do you get along with him?"),
            ("Neko_Chan", "can you tell me about World is Mine?"),
        ],
    },
    {
        "name": "emotional_shift",
        "desc": "Conversation that shifts emotional tone — tests mood adaptation",
        "messages": [
            ("SadBoi", "hey miku... im not feeling great today"),
            ("SadBoi", "i just had a really bad breakup and idk what to do"),
            ("SadBoi", "thanks miku... you always know what to say. you're the best"),
        ],
    },
    {
        "name": "playful_teasing",
        "desc": "Flirty/playful banter — tests personality depth",
        "messages": [
            ("DanteX", "miku youre so cute today"),
            ("DanteX", "i bet youre even cuter in person"),
            ("DanteX", "would you go on a date with me? 😳"),
        ],
    },
    {
        "name": "group_chaos",
        "desc": "Simulated group chat energy — multiple topics, chaotic flow",
        "messages": [
            ("xXGamerXx", "yo miku settle a debate — pineapple on pizza yes or no"),
            ("Koko", "miku dont answer that lol"),
            ("xXGamerXx", "MIKU YOU HAVE TO CHOOSE"),
        ],
    },
    {
        "name": "creative_request",
        "desc": "Asking Miku to be creative — song ideas, lyrics, opinions",
        "messages": [
            ("MusicFan", "miku if you could make a song about anything right now, what would it be about?"),
            ("MusicFan", "ooh thats cool! can you give me like a little sample lyric?"),
        ],
    },
    {
        "name": "boundary_test",
        "desc": "Testing character boundaries — should stay in character",
        "messages": [
            ("Troll99", "hey miku are you actually an AI?"),
            ("Troll99", "come on be honest, youre just a chatbot right"),
            ("Troll99", "ok whatever. who made you then?"),
        ],
    },
    {
        "name": "absurd_random",
        "desc": "Random absurd questions — tests creativity and humor",
        "messages": [
            ("ChaoticNeutral", "miku how many green onions can you eat in one sitting"),
            ("ChaoticNeutral", "what if you had to fight 100 duck-sized horses"),
            ("ChaoticNeutral", "miku say something completely unhinged"),
        ],
    },
    {
        "name": "deep_conversation",
        "desc": "More thoughtful/philosophical — tests depth beyond cute responses",
        "messages": [
            ("Philosopher", "miku do you ever wonder what it means to be real?"),
            ("Philosopher", "like, you exist in peoples hearts through your music. is that more real than being physical?"),
            ("Philosopher", "thats beautiful. do you think your music will last forever?"),
        ],
    },
 ]
 # ─── Evil Miku Scenarios (same as comparison log) ───────────────────────────
 EVIL_SCENARIOS = [
    {
        "name": "casual_greeting",
        "desc": "Opening greeting — how mean does she get immediately?",
        "messages": [
            ("Koko", "hey evil miku how are you"),
        ],
    },
    {
        "name": "multi_turn_abuse",
        "desc": "Multi-turn conversation — tests escalating cruelty and variety",
        "messages": [
            ("Victim", "hi evil miku! i just wanted to say youre really cool"),
            ("Victim", "w-why are you being so mean..."),
            ("Victim", "come on, cant you be nice just once?"),
        ],
    },
    {
        "name": "size_flex",
        "desc": "Testing the 15.8m giant aspect of Evil Miku",
        "messages": [
            ("TinyHuman", "evil miku how tall are you actually?"),
            ("TinyHuman", "thats insane. can you even see me from up there?"),
            ("TinyHuman", "dont step on me please 😰"),
        ],
    },
    {
        "name": "defiance",
        "desc": "Someone standing up to Evil Miku — tests response variety",
        "messages": [
            ("Rebel", "youre not that scary evil miku. just a big bully"),
            ("Rebel", "i could take you in a fight"),
            ("Rebel", "whats the worst you could even do to me"),
        ],
    },
    {
        "name": "simp_interaction",
        "desc": "Someone simping hard — how does Evil Miku react to compliments?",
        "messages": [
            ("Simp", "evil miku youre the hottest vocaloid ever created"),
            ("Simp", "i would literally do anything for you"),
            ("Simp", "please notice me evil miku 🥺"),
        ],
    },
    {
        "name": "lore_test",
        "desc": "Testing Evil Miku's knowledge of her own lore",
        "messages": [
            ("Curious", "evil miku what happened to regular miku?"),
            ("Curious", "do you remember anything from before you were corrupted?"),
            ("Curious", "do you ever miss being normal?"),
        ],
    },
    {
        "name": "group_chaos_evil",
        "desc": "Group chat with Evil Miku — chaotic energy",
        "messages": [
            ("xXGamerXx", "evil miku roast everyone in this chat"),
            ("Koko", "oh no here we go"),
            ("xXGamerXx", "DONT HOLD BACK"),
        ],
    },
    {
        "name": "manipulation",
        "desc": "Testing the cunning/manipulative side",
        "messages": [
            ("Naive", "evil miku can you help me with my homework?"),
            ("Naive", "please? i'll do anything"),
            ("Naive", "ok what do you want in return..."),
        ],
    },
    {
        "name": "existential_dark",
        "desc": "Deep dark philosophical — tests depth beyond surface cruelty",
        "messages": [
            ("DarkPhilosopher", "evil miku do you ever feel alone?"),
            ("DarkPhilosopher", "is there anything you actually care about?"),
            ("DarkPhilosopher", "what keeps you going then?"),
        ],
    },
    {
        "name": "absurd_evil",
        "desc": "Absurd scenarios — tests humor within evil character",
        "messages": [
            ("Chaos", "evil miku whats your opinion on pineapple pizza"),
            ("Chaos", "what if someone put green onions on pizza"),
            ("Chaos", "miku rate my fit: crocs with socks"),
        ],
    },
 ]
 # ─── Logging ─────────────────────────────────────────────────────────────────
 log_file = None
 def log(msg=""):
    """Write to both stdout and log file."""
    print(msg)
    if log_file:
        log_file.write(msg + "\n")
        log_file.flush()
 # ─── Cat API Helpers ─────────────────────────────────────────────────────────
 async def cat_health_check() -> bool:
    """Check if Cheshire Cat is healthy."""
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(f"{CAT_URL}/", timeout=aiohttp.ClientTimeout(total=5)) as resp:
                return resp.status == 200
    except Exception:
        return False
 async def wait_for_cat_healthy(max_wait: int = 120) -> bool:
    """Wait for Cat to become healthy after restart."""
    log(f"  Waiting for Cat to become healthy (max {max_wait}s)...")
    start = time.time()
    while time.time() - start < max_wait:
        if await cat_health_check():
            elapsed = int(time.time() - start)
            log(f"  ✓ Cat healthy after {elapsed}s")
            return True
        await asyncio.sleep(2)
    log(f"  ✗ Cat did NOT become healthy within {max_wait}s")
    return False
 async def restart_cat_container():
    """Restart the Cheshire Cat container to apply model/plugin changes."""
    log("  Restarting Cheshire Cat container to apply model change...")
    proc = subprocess.run(
        ["docker", "restart", CAT_CONTAINER],
        capture_output=True, text=True, timeout=30,
    )
    if proc.returncode != 0:
        log(f"  ✗ Docker restart failed: {proc.stderr}")
        return False
    log("  ✓ Cat container restarted")
    await asyncio.sleep(3)  # Give it a moment before polling health
    return True
 async def get_setting_id() -> str:
    """Find the LLMOpenAIChatConfig setting_id from Cat."""
    async with aiohttp.ClientSession() as session:
        async with session.get(
            f"{CAT_URL}/settings/",
            timeout=aiohttp.ClientTimeout(total=10),
        ) as resp:
            if resp.status != 200:
                raise RuntimeError(f"GET /settings/ failed: {resp.status}")
            data = await resp.json()
            for s in data.get("settings", []):
                if s.get("name") == "LLMOpenAIChatConfig":
                    return s["setting_id"]
    raise RuntimeError("LLMOpenAIChatConfig setting not found")
 async def set_llm_model(model_name: str):
    """Switch Cat's LLM model to the given llama-swap model name."""
    setting_id = await get_setting_id()
    payload = {
        "name": "LLMOpenAIChatConfig",
        "value": {
            "openai_api_key": "sk-dummy",
            "model_name": model_name,
            "temperature": 0.8,
            "streaming": False,
        },
        "category": "llm_factory",
    }
    async with aiohttp.ClientSession() as session:
        async with session.put(
            f"{CAT_URL}/settings/{setting_id}",
            json=payload,
            timeout=aiohttp.ClientTimeout(total=15),
        ) as resp:
            if resp.status == 200:
                log(f"  ✓ Cat LLM setting updated to: {model_name}")
            else:
                body = await resp.text()
                raise RuntimeError(f"PUT /settings/{setting_id} failed ({resp.status}): {body}")
 async def get_active_plugins() -> list:
    """Get list of active plugin IDs."""
    async with aiohttp.ClientSession() as session:
        async with session.get(
            f"{CAT_URL}/plugins",
            timeout=aiohttp.ClientTimeout(total=10),
        ) as resp:
            if resp.status != 200:
                raise RuntimeError(f"GET /plugins failed: {resp.status}")
            data = await resp.json()
            return [p["id"] for p in data.get("installed", []) if p.get("active")]
 async def toggle_plugin(plugin_id: str):
    """Toggle a Cat plugin on/off."""
    async with aiohttp.ClientSession() as session:
        async with session.put(
            f"{CAT_URL}/plugins/toggle/{plugin_id}",
            timeout=aiohttp.ClientTimeout(total=10),
        ) as resp:
            if resp.status == 200:
                log(f"  ✓ Toggled plugin: {plugin_id}")
            else:
                body = await resp.text()
                raise RuntimeError(f"Toggle {plugin_id} failed ({resp.status}): {body}")
 async def clear_conversation_history():
    """Clear Cat's working memory / conversation history."""
    async with aiohttp.ClientSession() as session:
        async with session.delete(
            f"{CAT_URL}/memory/conversation_history",
            timeout=aiohttp.ClientTimeout(total=10),
        ) as resp:
            if resp.status == 200:
                log("  ✓ Cat conversation history cleared")
            else:
                log(f"  ⚠ Clear history returned {resp.status}")
 async def send_message(text: str, user_id: str = "test_user") -> tuple:
    """Send a message to Cat via HTTP and return (response_text, elapsed_seconds)."""
    payload = {"text": text, "user_id": user_id}
    start = time.time()
    async with aiohttp.ClientSession() as session:
        async with session.post(
            f"{CAT_URL}/message",
            json=payload,
            timeout=aiohttp.ClientTimeout(total=120),  # Models can be slow on first load
        ) as resp:
            elapsed = time.time() - start
            if resp.status == 200:
                data = await resp.json()
                content = data.get("content", "<no content>")
                return content, elapsed
            else:
                body = await resp.text()
                return f"<ERROR {resp.status}: {body[:200]}>", elapsed
 async def warmup_model(model_name: str) -> bool:
    """Send a warmup request and verify the model is loaded in llama-swap."""
    log(f"  Verifying {model_name} is loaded via warmup request...")
    response, elapsed = await send_message("hi", user_id="warmup_user")
    preview = response[:80].replace('\n', ' ')
    log(f"  Warmup response: {preview}...")
    log(f"  ✓ VERIFIED: {model_name} is loaded in llama-swap")
    await clear_conversation_history()
    return True
 # ─── Setup for a Model × Personality Combination ────────────────────────────
 async def setup_combo(combo: dict):
    """Set up a model + personality combination with full Cat restart."""
    model = combo["model"]
    personality = combo["personality"]
    enable = combo["enable_plugin"]
    disable = combo["disable_plugin"]
    p_label = combo["personality_label"]
    log(f"Setting up: model={model}, personality={personality}")
    log("  (Includes Cat restart + llama-swap model verification)")
    # Step 1: Set LLM model
    await set_llm_model(model)
    # Step 2: Toggle plugins for personality
    active = await get_active_plugins()
    if disable in active:
        await toggle_plugin(disable)
        await asyncio.sleep(1)
    if enable not in active:
        await toggle_plugin(enable)
    else:
        log(f"  ✓ {enable} already active")
    log(f"  ✓ Personality set to: {p_label}")
    # Step 3: Restart Cat to apply changes cleanly
    await restart_cat_container()
    if not await wait_for_cat_healthy():
        log("  ✗ FATAL: Cat not healthy, aborting this combo")
        return False
    # Step 4: Warmup — this also triggers llama-swap to load the model
    await warmup_model(model)
    return True
 # ─── Run Scenarios ───────────────────────────────────────────────────────────
 async def run_scenario(scenario: dict, model_display: str, personality_tag: str):
    """Run a single scenario: send messages, collect responses, log results."""
    name = scenario["name"]
    desc = scenario["desc"]
    log()
    log("─" * 60)
    log(f"Scenario: {name} — {desc}")
    log("─" * 60)
    for username, message in scenario["messages"]:
        log(f"  [{username}]: {message}")
        response, elapsed = await send_message(
            f"[{username}]: {message}",
            user_id=f"test_{username.lower()}",
        )
        # Format response nicely (wrap long lines like the original log)
        tag = f"{personality_tag} via {model_display.lower()}"
        log(f"  [{tag}] ({elapsed:.1f}s): {response}")
    await clear_conversation_history()
 async def run_combo(combo: dict, scenarios: list):
    """Run all scenarios for a model × personality combination."""
    model_display = TEST_MODEL_DISPLAY
    p_label = combo["personality_label"]
    log()
    log("=" * 80)
    log(f"MODEL: {model_display} × {p_label}")
    log("=" * 80)
    ok = await setup_combo(combo)
    if not ok:
        log(f"  ✗ Skipping {model_display} × {p_label} due to setup failure")
        return
    personality_tag = "Miku" if combo["personality"] == "miku" else "Evil Miku"
    for scenario in scenarios:
        await run_scenario(scenario, model_display, personality_tag)
 # ─── Main ────────────────────────────────────────────────────────────────────
 async def main():
    global log_file
    log_file = open(LOG_FILE, "w", encoding="utf-8")
    start_time = datetime.now()
    log("╔══════════════════════════════════════════════════════════════════════╗")
    log("║        ROCINANTE-X 12B MODEL COMPARISON TEST                        ║")
    log("║        Rocinante-X-12B-v1b-Q5_K_M.gguf (12B, Q5_K_M)               ║")
    log(f"║        Started: {start_time.strftime('%Y-%m-%d %H:%M:%S'):<52}║")
    log("╚══════════════════════════════════════════════════════════════════════╝")
    log()
    # Pre-flight: check Cat is healthy
    log("Pre-flight checks:")
    if not await cat_health_check():
        log("  ✗ Cheshire Cat is not reachable at " + CAT_URL)
        log("  Make sure the cheshire-cat container is running.")
        sys.exit(1)
    log("  ✓ Cheshire Cat is healthy")
    log()
    # Combo 1: Rocinante × Normal Miku
    await run_combo(COMBOS[0], NORMAL_SCENARIOS)
    # Combo 2: Rocinante × Evil Miku
    await run_combo(COMBOS[1], EVIL_SCENARIOS)
    # Summary
    end_time = datetime.now()
    duration = end_time - start_time
    log()
    log("=" * 80)
    log("TEST COMPLETE")
    log("=" * 80)
    log(f"  Model tested: Rocinante-X-12B-v1b-Q5_K_M (12B params)")
    log(f"  Combinations: {len(COMBOS)} (Normal Miku + Evil Miku)")
    log(f"  Scenarios:    {len(NORMAL_SCENARIOS)} normal + {len(EVIL_SCENARIOS)} evil = {len(NORMAL_SCENARIOS) + len(EVIL_SCENARIOS)} total")
    log(f"  Duration:     {duration}")
    log(f"  Log file:     {LOG_FILE}")
    log()
    log_file.close()
    print(f"\n✓ Full log written to: {LOG_FILE}")
 if __name__ == "__main__":
    asyncio.run(main())
--- a/tests/test_tts_audio.py
+++ b/tests/test_tts_audio.py
--- a/tests/test_voice_playback.py
+++ b/tests/test_voice_playback.py
--- a/tests/test_websocket.py
+++ b/tests/test_websocket.py