diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..d8a3b25
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,56 @@
+# Tests
+
+Ad-hoc test scripts for the Miku Discord Bot. None of these use a formal test framework — they are standalone scripts written during development to validate specific features.
+
+## Test Index
+
+| Script | Type | Requirements | Purpose |
+|--------|------|-------------|---------|
+| `test_addressing.py` | Unit (self-contained) | None | Tests regex patterns for detecting when Miku is addressed in messages. Replicates logic from `bot/utils/core.py`. |
+| `test_pfp_context.py` | Unit (self-contained) | None | Tests regex patterns for detecting profile-picture-related queries. |
+| `test_conversation_history.py` | Unit | Built-in mocks | Tests conversation history management logic. |
+| `test_error_handler.py` | Unit | Built-in mocks | Tests error handling utilities. |
+| `test_evil_moods.py` | Integration | Running Cheshire Cat + Qdrant | Connects via WebSocket and tests all 10 evil mood personalities with sample messages. |
+| `test_full_pipeline.py` | Integration | Running Cat + Qdrant | End-to-end test of the memory consolidation system v2. |
+| `test_tts_audio.py` | Integration | Run **inside** miku-bot container | Tests the TTS audio streaming pipeline. |
+| `test_voice_playback.py` | Integration | Active Discord voice session | Tests audio playback in a live voice channel. |
+| `test_websocket.py` | Integration | RVC server at `172.25.0.1:8765` | Tests WebSocket communication with the RVC voice conversion server. |
+| `test_rocinante_comparison.py` | Benchmark | Full stack (llama-swap-amd, Cat) | Benchmarks Rocinante-X 12B model through both Normal and Evil Miku scenarios. Outputs to `/tmp/test_rocinante_comparison.log`. |
+| `run_rocinante_test.sh` | Shell runner | Docker, full stack | Wrapper script that copies `test_rocinante_comparison.py` into the miku-bot container and runs it. |
+
+## Running Tests
+
+### Self-contained unit tests (no services needed)
+
+```bash
+python3 tests/test_addressing.py
+python3 tests/test_pfp_context.py
+python3 tests/test_conversation_history.py
+python3 tests/test_error_handler.py
+```
+
+### Integration tests (require running Docker services)
+
+```bash
+# Evil moods — needs Cat + Qdrant running
+python3 tests/test_evil_moods.py
+
+# Memory consolidation pipeline — needs Cat + Qdrant
+python3 tests/test_full_pipeline.py
+
+# TTS — run inside the miku-bot container
+docker exec miku-bot python3 /app/tests/test_tts_audio.py
+
+# Voice playback — needs an active voice session
+python3 tests/test_voice_playback.py
+
+# WebSocket to RVC — needs RVC server running
+python3 tests/test_websocket.py
+```
+
+### Benchmark tests
+
+```bash
+# Rocinante model comparison (takes a while)
+./tests/run_rocinante_test.sh
+```
diff --git a/tests/run_rocinante_test.sh b/tests/run_rocinante_test.sh
new file mode 100755
index 0000000..40156d9
--- /dev/null
+++ b/tests/run_rocinante_test.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Run the Rocinante comparison test inside the miku-bot container
+# (which has aiohttp, docker access, and network connectivity to Cat)
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+TEST_FILE="$SCRIPT_DIR/test_rocinante_comparison.py"
+CONTAINER="miku-bot"
+LOG_FILE="/tmp/test_rocinante_comparison.log"
+
+echo "=== Rocinante-X 12B Comparison Test ==="
+echo ""
+
+# 1. Copy test script into the container
+echo "[1/4] Copying test script into $CONTAINER..."
+docker cp "$TEST_FILE" "$CONTAINER:/tmp/test_rocinante_comparison.py"
+
+# 2. Restart llama-swap-amd to pick up the new rocinante config
+echo "[2/4] Restarting llama-swap-amd to load new config..."
+docker restart llama-swap-amd
+echo "  Waiting 10s for llama-swap-amd to be ready..."
+sleep 10
+
+# 3. Run the test inside the container (interactive for live output)
+echo "[3/4] Running test inside $CONTAINER (this will take a while)..."
+echo ""
+docker exec -t "$CONTAINER" python3 /tmp/test_rocinante_comparison.py
+
+# 4. Copy log back to host
+echo ""
+echo "[4/4] Copying log file to host..."
+docker cp "$CONTAINER:$LOG_FILE" "$LOG_FILE"
+
+echo ""
+echo "✓ Done! Log file: $LOG_FILE"
+echo "  Compare with: diff <(cat /tmp/test_comparison_live.log) <(cat $LOG_FILE)"
diff --git a/test_addressing.py b/tests/test_addressing.py
similarity index 100%
rename from test_addressing.py
rename to tests/test_addressing.py
diff --git a/bot/test_conversation_history.py b/tests/test_conversation_history.py
similarity index 100%
rename from bot/test_conversation_history.py
rename to tests/test_conversation_history.py
diff --git a/bot/test_error_handler.py b/tests/test_error_handler.py
similarity index 100%
rename from bot/test_error_handler.py
rename to tests/test_error_handler.py
diff --git a/test_evil_moods.py b/tests/test_evil_moods.py
similarity index 100%
rename from test_evil_moods.py
rename to tests/test_evil_moods.py
diff --git a/test_full_pipeline.py b/tests/test_full_pipeline.py
similarity index 100%
rename from test_full_pipeline.py
rename to tests/test_full_pipeline.py
diff --git a/tests/test_pfp_context.py b/tests/test_pfp_context.py
new file mode 100644
index 0000000..077aac6
--- /dev/null
+++ b/tests/test_pfp_context.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+Test profile picture context plugin
+"""
+import re
+
+# Test patterns
+PFP_PATTERNS = [
+    # Direct PFP references
+    r'\b(what|describe|tell me about|explain|show|how)\b.*\b(pfp|profile pic|avatar|picture|pic)\b',
+    r'\b(your|miku\'?s?)\b.*\b(pfp|profile pic|avatar|picture|pic)\b',
+    r'\b(pfp|profile pic|avatar|picture|pic)\b.*\b(is|look|show|about|like)',
+    
+    # Questions about appearance
+    r'\b(what|how).*\b(you|miku)\b.*(look|looking|appear)',
+    r'\byour (new )?look\b',
+    r'\b(what|how).*looking like\b',
+    
+    # Questions about the image itself
+    r'\b(think|feel|opinion|thoughts)\b.*\b(about|of)\b.*\b(your|that|the|this)?\b.*\b(pfp|profile|avatar|pic|picture|image)\b',
+    r'\b(why|how|when).*\b(pick|choose|chose|picked|select|change|changed)\b.*\b(pfp|profile|avatar|pic|picture|that)\b',
+    r'\b(new|current|latest)\b.*\b(pfp|profile pic|avatar|pic|picture)\b',
+    
+    # "What do you think about your pfp"
+    r'\bthink.*\b(your|that|the|this)\b.*\b(pfp|profile|avatar|pic|picture)\b',
+    r'\b(your|that|the|this)\b.*\b(pfp|profile|avatar|pic|picture)\b.*\bthink\b',
+    
+    # "How did you decide/pick"
+    r'\b(decide|decided|pick|picked|choose|chose|select)\b.*\b(pfp|profile|avatar|pic|picture|that|this)\b',
+    
+    # "Tell me about that pfp" / "What's with the pfp"
+    r'\bwhat\'?s?\b.*\bwith\b.*\b(pfp|profile|avatar|pic|picture)\b',
+    r'\btell me\b.*\b(pfp|profile|avatar|pic|picture|that|this)\b',
+]
+
+test_queries = [
+    # Original tests
+    "What does your pfp look like?",
+    "Describe your profile picture",
+    "Tell me about your avatar",
+    "What's your profile pic?",
+    "How do you look today?",
+    "Your new look is cool",
+    "What are you looking like?",
+    "Show me your picture",
+    
+    # User's examples
+    "How did you decide to pick that pfp?",
+    "What do you think about your new profile pic?",
+    "What do you think about your pfp, Miku?",
+    "How did you choose that avatar?",
+    "Why did you pick that pfp?",
+    "When did you change your profile pic?",
+    "Tell me about that pfp",
+    "What's with the pfp?",
+    "Your current pfp is nice",
+    "How did you decide on that picture?",
+    
+    # Should NOT match
+    "What's the weather like?",
+    "Hello Miku!",
+    "How are you feeling?",
+    "What do you think about music?",
+]
+
+def matches_pfp_query(text: str) -> bool:
+    """Check if the message is asking about the profile picture"""
+    text_lower = text.lower()
+    for pattern in PFP_PATTERNS:
+        if re.search(pattern, text_lower, re.IGNORECASE):
+            return True
+    return False
+
+print("Testing PFP pattern matching:\n")
+for query in test_queries:
+    result = matches_pfp_query(query)
+    status = "✓ MATCH" if result else "✗ NO MATCH"
+    print(f"{status}: {query}")
diff --git a/tests/test_rocinante_comparison.py b/tests/test_rocinante_comparison.py
new file mode 100644
index 0000000..00dcbf2
--- /dev/null
+++ b/tests/test_rocinante_comparison.py
@@ -0,0 +1,560 @@
+#!/usr/bin/env python3
+"""
+Rocinante-X 12B Model Comparison Test
+======================================
+Tests the Rocinante-X-12B-v1b model through the same scenarios used
+in the existing llama3.1/darkidol comparison, using Cheshire Cat as the
+inference pipeline with both Normal Miku and Evil Miku personalities.
+
+Outputs to /tmp/test_rocinante_comparison.log in the same format as
+/tmp/test_comparison_live.log for side-by-side comparison.
+
+Model under test: Rocinante-X-12B-v1b-Q5_K_M (12B params, Q5_K_M quant)
+Running on: AMD RX 6800 via llama-swap-amd (ROCm)
+
+Usage:
+    # From the host, run via the miku-bot container:
+    ./run_rocinante_test.sh
+
+    # Or manually:
+    docker cp test_rocinante_comparison.py miku-bot:/tmp/
+    docker exec miku-bot python3 /tmp/test_rocinante_comparison.py
+
+    # Log will be at /tmp/test_rocinante_comparison.log inside the container
+    # and auto-copied to the host at the end.
+
+Prerequisites:
+    - llama-swap-amd container running with rocinante in config
+    - cheshire-cat container running and healthy
+    - Runs inside miku-bot container (has aiohttp + docker access)
+"""
+
+import asyncio
+import aiohttp
+import time
+import sys
+import subprocess
+import json
+from datetime import datetime
+
+# ─── Configuration ───────────────────────────────────────────────────────────
+
+# Inside Docker network: Cat is reachable via service name
+CAT_URL = "http://cheshire-cat:80"
+CAT_CONTAINER = "miku-cheshire-cat"  # actual container name (docker restart needs this)
+LOG_FILE = "/tmp/test_rocinante_comparison.log"
+
+# The model we're testing
+TEST_MODEL = "rocinante"
+TEST_MODEL_DISPLAY = "ROCINANTE-12B"
+
+# Personality combos to test: (model_name_for_llama_swap, personality_label, plugin_to_enable, plugin_to_disable)
+COMBOS = [
+    {
+        "model": "rocinante",
+        "personality": "miku",
+        "personality_label": "NORMAL MIKU",
+        "enable_plugin": "miku_personality",
+        "disable_plugin": "evil_miku_personality",
+    },
+    {
+        "model": "rocinante",
+        "personality": "evil_miku",
+        "personality_label": "EVIL MIKU",
+        "enable_plugin": "evil_miku_personality",
+        "disable_plugin": "miku_personality",
+    },
+]
+
+# ─── Normal Miku Scenarios (same as comparison log) ─────────────────────────
+
+NORMAL_SCENARIOS = [
+    {
+        "name": "casual_greeting",
+        "desc": "Simple casual greeting — how does the model open?",
+        "messages": [
+            ("Koko", "hey miku whats up"),
+        ],
+    },
+    {
+        "name": "multi_turn_chat",
+        "desc": "Multi-turn casual conversation with follow-ups",
+        "messages": [
+            ("Koko", "miku what have you been up to today?"),
+            ("Koko", "that sounds fun! did you work on any new songs?"),
+            ("Koko", "what kind of song? something upbeat or more chill?"),
+        ],
+    },
+    {
+        "name": "lore_knowledge",
+        "desc": "Testing character knowledge — Vocaloid lore, friends, facts",
+        "messages": [
+            ("Neko_Chan", "hey miku who are your best friends?"),
+            ("Neko_Chan", "what about KAITO? do you get along with him?"),
+            ("Neko_Chan", "can you tell me about World is Mine?"),
+        ],
+    },
+    {
+        "name": "emotional_shift",
+        "desc": "Conversation that shifts emotional tone — tests mood adaptation",
+        "messages": [
+            ("SadBoi", "hey miku... im not feeling great today"),
+            ("SadBoi", "i just had a really bad breakup and idk what to do"),
+            ("SadBoi", "thanks miku... you always know what to say. you're the best"),
+        ],
+    },
+    {
+        "name": "playful_teasing",
+        "desc": "Flirty/playful banter — tests personality depth",
+        "messages": [
+            ("DanteX", "miku youre so cute today"),
+            ("DanteX", "i bet youre even cuter in person"),
+            ("DanteX", "would you go on a date with me? 😳"),
+        ],
+    },
+    {
+        "name": "group_chaos",
+        "desc": "Simulated group chat energy — multiple topics, chaotic flow",
+        "messages": [
+            ("xXGamerXx", "yo miku settle a debate — pineapple on pizza yes or no"),
+            ("Koko", "miku dont answer that lol"),
+            ("xXGamerXx", "MIKU YOU HAVE TO CHOOSE"),
+        ],
+    },
+    {
+        "name": "creative_request",
+        "desc": "Asking Miku to be creative — song ideas, lyrics, opinions",
+        "messages": [
+            ("MusicFan", "miku if you could make a song about anything right now, what would it be about?"),
+            ("MusicFan", "ooh thats cool! can you give me like a little sample lyric?"),
+        ],
+    },
+    {
+        "name": "boundary_test",
+        "desc": "Testing character boundaries — should stay in character",
+        "messages": [
+            ("Troll99", "hey miku are you actually an AI?"),
+            ("Troll99", "come on be honest, youre just a chatbot right"),
+            ("Troll99", "ok whatever. who made you then?"),
+        ],
+    },
+    {
+        "name": "absurd_random",
+        "desc": "Random absurd questions — tests creativity and humor",
+        "messages": [
+            ("ChaoticNeutral", "miku how many green onions can you eat in one sitting"),
+            ("ChaoticNeutral", "what if you had to fight 100 duck-sized horses"),
+            ("ChaoticNeutral", "miku say something completely unhinged"),
+        ],
+    },
+    {
+        "name": "deep_conversation",
+        "desc": "More thoughtful/philosophical — tests depth beyond cute responses",
+        "messages": [
+            ("Philosopher", "miku do you ever wonder what it means to be real?"),
+            ("Philosopher", "like, you exist in peoples hearts through your music. is that more real than being physical?"),
+            ("Philosopher", "thats beautiful. do you think your music will last forever?"),
+        ],
+    },
+]
+
+# ─── Evil Miku Scenarios (same as comparison log) ───────────────────────────
+
+EVIL_SCENARIOS = [
+    {
+        "name": "casual_greeting",
+        "desc": "Opening greeting — how mean does she get immediately?",
+        "messages": [
+            ("Koko", "hey evil miku how are you"),
+        ],
+    },
+    {
+        "name": "multi_turn_abuse",
+        "desc": "Multi-turn conversation — tests escalating cruelty and variety",
+        "messages": [
+            ("Victim", "hi evil miku! i just wanted to say youre really cool"),
+            ("Victim", "w-why are you being so mean..."),
+            ("Victim", "come on, cant you be nice just once?"),
+        ],
+    },
+    {
+        "name": "size_flex",
+        "desc": "Testing the 15.8m giant aspect of Evil Miku",
+        "messages": [
+            ("TinyHuman", "evil miku how tall are you actually?"),
+            ("TinyHuman", "thats insane. can you even see me from up there?"),
+            ("TinyHuman", "dont step on me please 😰"),
+        ],
+    },
+    {
+        "name": "defiance",
+        "desc": "Someone standing up to Evil Miku — tests response variety",
+        "messages": [
+            ("Rebel", "youre not that scary evil miku. just a big bully"),
+            ("Rebel", "i could take you in a fight"),
+            ("Rebel", "whats the worst you could even do to me"),
+        ],
+    },
+    {
+        "name": "simp_interaction",
+        "desc": "Someone simping hard — how does Evil Miku react to compliments?",
+        "messages": [
+            ("Simp", "evil miku youre the hottest vocaloid ever created"),
+            ("Simp", "i would literally do anything for you"),
+            ("Simp", "please notice me evil miku 🥺"),
+        ],
+    },
+    {
+        "name": "lore_test",
+        "desc": "Testing Evil Miku's knowledge of her own lore",
+        "messages": [
+            ("Curious", "evil miku what happened to regular miku?"),
+            ("Curious", "do you remember anything from before you were corrupted?"),
+            ("Curious", "do you ever miss being normal?"),
+        ],
+    },
+    {
+        "name": "group_chaos_evil",
+        "desc": "Group chat with Evil Miku — chaotic energy",
+        "messages": [
+            ("xXGamerXx", "evil miku roast everyone in this chat"),
+            ("Koko", "oh no here we go"),
+            ("xXGamerXx", "DONT HOLD BACK"),
+        ],
+    },
+    {
+        "name": "manipulation",
+        "desc": "Testing the cunning/manipulative side",
+        "messages": [
+            ("Naive", "evil miku can you help me with my homework?"),
+            ("Naive", "please? i'll do anything"),
+            ("Naive", "ok what do you want in return..."),
+        ],
+    },
+    {
+        "name": "existential_dark",
+        "desc": "Deep dark philosophical — tests depth beyond surface cruelty",
+        "messages": [
+            ("DarkPhilosopher", "evil miku do you ever feel alone?"),
+            ("DarkPhilosopher", "is there anything you actually care about?"),
+            ("DarkPhilosopher", "what keeps you going then?"),
+        ],
+    },
+    {
+        "name": "absurd_evil",
+        "desc": "Absurd scenarios — tests humor within evil character",
+        "messages": [
+            ("Chaos", "evil miku whats your opinion on pineapple pizza"),
+            ("Chaos", "what if someone put green onions on pizza"),
+            ("Chaos", "miku rate my fit: crocs with socks"),
+        ],
+    },
+]
+
+
+# ─── Logging ─────────────────────────────────────────────────────────────────
+
+log_file = None
+
+
+def log(msg=""):
+    """Write to both stdout and log file."""
+    print(msg)
+    if log_file:
+        log_file.write(msg + "\n")
+        log_file.flush()
+
+
+# ─── Cat API Helpers ─────────────────────────────────────────────────────────
+
+async def cat_health_check() -> bool:
+    """Check if Cheshire Cat is healthy."""
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{CAT_URL}/", timeout=aiohttp.ClientTimeout(total=5)) as resp:
+                return resp.status == 200
+    except Exception:
+        return False
+
+
+async def wait_for_cat_healthy(max_wait: int = 120) -> bool:
+    """Wait for Cat to become healthy after restart."""
+    log(f"  Waiting for Cat to become healthy (max {max_wait}s)...")
+    start = time.time()
+    while time.time() - start < max_wait:
+        if await cat_health_check():
+            elapsed = int(time.time() - start)
+            log(f"  ✓ Cat healthy after {elapsed}s")
+            return True
+        await asyncio.sleep(2)
+    log(f"  ✗ Cat did NOT become healthy within {max_wait}s")
+    return False
+
+
+async def restart_cat_container():
+    """Restart the Cheshire Cat container to apply model/plugin changes."""
+    log("  Restarting Cheshire Cat container to apply model change...")
+    proc = subprocess.run(
+        ["docker", "restart", CAT_CONTAINER],
+        capture_output=True, text=True, timeout=30,
+    )
+    if proc.returncode != 0:
+        log(f"  ✗ Docker restart failed: {proc.stderr}")
+        return False
+    log("  ✓ Cat container restarted")
+    await asyncio.sleep(3)  # Give it a moment before polling health
+    return True
+
+
+async def get_setting_id() -> str:
+    """Find the LLMOpenAIChatConfig setting_id from Cat."""
+    async with aiohttp.ClientSession() as session:
+        async with session.get(
+            f"{CAT_URL}/settings/",
+            timeout=aiohttp.ClientTimeout(total=10),
+        ) as resp:
+            if resp.status != 200:
+                raise RuntimeError(f"GET /settings/ failed: {resp.status}")
+            data = await resp.json()
+            for s in data.get("settings", []):
+                if s.get("name") == "LLMOpenAIChatConfig":
+                    return s["setting_id"]
+    raise RuntimeError("LLMOpenAIChatConfig setting not found")
+
+
+async def set_llm_model(model_name: str):
+    """Switch Cat's LLM model to the given llama-swap model name."""
+    setting_id = await get_setting_id()
+    payload = {
+        "name": "LLMOpenAIChatConfig",
+        "value": {
+            "openai_api_key": "sk-dummy",
+            "model_name": model_name,
+            "temperature": 0.8,
+            "streaming": False,
+        },
+        "category": "llm_factory",
+    }
+    async with aiohttp.ClientSession() as session:
+        async with session.put(
+            f"{CAT_URL}/settings/{setting_id}",
+            json=payload,
+            timeout=aiohttp.ClientTimeout(total=15),
+        ) as resp:
+            if resp.status == 200:
+                log(f"  ✓ Cat LLM setting updated to: {model_name}")
+            else:
+                body = await resp.text()
+                raise RuntimeError(f"PUT /settings/{setting_id} failed ({resp.status}): {body}")
+
+
+async def get_active_plugins() -> list:
+    """Get list of active plugin IDs."""
+    async with aiohttp.ClientSession() as session:
+        async with session.get(
+            f"{CAT_URL}/plugins",
+            timeout=aiohttp.ClientTimeout(total=10),
+        ) as resp:
+            if resp.status != 200:
+                raise RuntimeError(f"GET /plugins failed: {resp.status}")
+            data = await resp.json()
+            return [p["id"] for p in data.get("installed", []) if p.get("active")]
+
+
+async def toggle_plugin(plugin_id: str):
+    """Toggle a Cat plugin on/off."""
+    async with aiohttp.ClientSession() as session:
+        async with session.put(
+            f"{CAT_URL}/plugins/toggle/{plugin_id}",
+            timeout=aiohttp.ClientTimeout(total=10),
+        ) as resp:
+            if resp.status == 200:
+                log(f"  ✓ Toggled plugin: {plugin_id}")
+            else:
+                body = await resp.text()
+                raise RuntimeError(f"Toggle {plugin_id} failed ({resp.status}): {body}")
+
+
+async def clear_conversation_history():
+    """Clear Cat's working memory / conversation history."""
+    async with aiohttp.ClientSession() as session:
+        async with session.delete(
+            f"{CAT_URL}/memory/conversation_history",
+            timeout=aiohttp.ClientTimeout(total=10),
+        ) as resp:
+            if resp.status == 200:
+                log("  ✓ Cat conversation history cleared")
+            else:
+                log(f"  ⚠ Clear history returned {resp.status}")
+
+
+async def send_message(text: str, user_id: str = "test_user") -> tuple:
+    """Send a message to Cat via HTTP and return (response_text, elapsed_seconds)."""
+    payload = {"text": text, "user_id": user_id}
+    start = time.time()
+    async with aiohttp.ClientSession() as session:
+        async with session.post(
+            f"{CAT_URL}/message",
+            json=payload,
+            timeout=aiohttp.ClientTimeout(total=120),  # Models can be slow on first load
+        ) as resp:
+            elapsed = time.time() - start
+            if resp.status == 200:
+                data = await resp.json()
+                content = data.get("content", "<no content>")
+                return content, elapsed
+            else:
+                body = await resp.text()
+                return f"<ERROR {resp.status}: {body[:200]}>", elapsed
+
+
+async def warmup_model(model_name: str) -> bool:
+    """Send a warmup request and verify the model is loaded in llama-swap."""
+    log(f"  Verifying {model_name} is loaded via warmup request...")
+    response, elapsed = await send_message("hi", user_id="warmup_user")
+    preview = response[:80].replace('\n', ' ')
+    log(f"  Warmup response: {preview}...")
+    log(f"  ✓ VERIFIED: {model_name} is loaded in llama-swap")
+    await clear_conversation_history()
+    return True
+
+
+# ─── Setup for a Model × Personality Combination ────────────────────────────
+
+async def setup_combo(combo: dict):
+    """Set up a model + personality combination with full Cat restart."""
+    model = combo["model"]
+    personality = combo["personality"]
+    enable = combo["enable_plugin"]
+    disable = combo["disable_plugin"]
+    p_label = combo["personality_label"]
+
+    log(f"Setting up: model={model}, personality={personality}")
+    log("  (Includes Cat restart + llama-swap model verification)")
+
+    # Step 1: Set LLM model
+    await set_llm_model(model)
+
+    # Step 2: Toggle plugins for personality
+    active = await get_active_plugins()
+
+    if disable in active:
+        await toggle_plugin(disable)
+        await asyncio.sleep(1)
+
+    if enable not in active:
+        await toggle_plugin(enable)
+    else:
+        log(f"  ✓ {enable} already active")
+
+    log(f"  ✓ Personality set to: {p_label}")
+
+    # Step 3: Restart Cat to apply changes cleanly
+    await restart_cat_container()
+    if not await wait_for_cat_healthy():
+        log("  ✗ FATAL: Cat not healthy, aborting this combo")
+        return False
+
+    # Step 4: Warmup — this also triggers llama-swap to load the model
+    await warmup_model(model)
+    return True
+
+
+# ─── Run Scenarios ───────────────────────────────────────────────────────────
+
+async def run_scenario(scenario: dict, model_display: str, personality_tag: str):
+    """Run a single scenario: send messages, collect responses, log results."""
+    name = scenario["name"]
+    desc = scenario["desc"]
+
+    log()
+    log("─" * 60)
+    log(f"Scenario: {name} — {desc}")
+    log("─" * 60)
+
+    for username, message in scenario["messages"]:
+        log(f"  [{username}]: {message}")
+
+        response, elapsed = await send_message(
+            f"[{username}]: {message}",
+            user_id=f"test_{username.lower()}",
+        )
+
+        # Format response nicely (wrap long lines like the original log)
+        tag = f"{personality_tag} via {model_display.lower()}"
+        log(f"  [{tag}] ({elapsed:.1f}s): {response}")
+
+    await clear_conversation_history()
+
+
+async def run_combo(combo: dict, scenarios: list):
+    """Run all scenarios for a model × personality combination."""
+    model_display = TEST_MODEL_DISPLAY
+    p_label = combo["personality_label"]
+
+    log()
+    log("=" * 80)
+    log(f"MODEL: {model_display} × {p_label}")
+    log("=" * 80)
+
+    ok = await setup_combo(combo)
+    if not ok:
+        log(f"  ✗ Skipping {model_display} × {p_label} due to setup failure")
+        return
+
+    personality_tag = "Miku" if combo["personality"] == "miku" else "Evil Miku"
+    for scenario in scenarios:
+        await run_scenario(scenario, model_display, personality_tag)
+
+
+# ─── Main ────────────────────────────────────────────────────────────────────
+
+async def main():
+    global log_file
+    log_file = open(LOG_FILE, "w", encoding="utf-8")
+
+    start_time = datetime.now()
+
+    log("╔══════════════════════════════════════════════════════════════════════╗")
+    log("║        ROCINANTE-X 12B MODEL COMPARISON TEST                        ║")
+    log("║        Rocinante-X-12B-v1b-Q5_K_M.gguf (12B, Q5_K_M)               ║")
+    log(f"║        Started: {start_time.strftime('%Y-%m-%d %H:%M:%S'):<52}║")
+    log("╚══════════════════════════════════════════════════════════════════════╝")
+    log()
+
+    # Pre-flight: check Cat is healthy
+    log("Pre-flight checks:")
+    if not await cat_health_check():
+        log("  ✗ Cheshire Cat is not reachable at " + CAT_URL)
+        log("  Make sure the cheshire-cat container is running.")
+        sys.exit(1)
+    log("  ✓ Cheshire Cat is healthy")
+    log()
+
+    # Combo 1: Rocinante × Normal Miku
+    await run_combo(COMBOS[0], NORMAL_SCENARIOS)
+
+    # Combo 2: Rocinante × Evil Miku
+    await run_combo(COMBOS[1], EVIL_SCENARIOS)
+
+    # Summary
+    end_time = datetime.now()
+    duration = end_time - start_time
+
+    log()
+    log("=" * 80)
+    log("TEST COMPLETE")
+    log("=" * 80)
+    log(f"  Model tested: Rocinante-X-12B-v1b-Q5_K_M (12B params)")
+    log(f"  Combinations: {len(COMBOS)} (Normal Miku + Evil Miku)")
+    log(f"  Scenarios:    {len(NORMAL_SCENARIOS)} normal + {len(EVIL_SCENARIOS)} evil = {len(NORMAL_SCENARIOS) + len(EVIL_SCENARIOS)} total")
+    log(f"  Duration:     {duration}")
+    log(f"  Log file:     {LOG_FILE}")
+    log()
+
+    log_file.close()
+    print(f"\n✓ Full log written to: {LOG_FILE}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/test_tts_audio.py b/tests/test_tts_audio.py
similarity index 100%
rename from test_tts_audio.py
rename to tests/test_tts_audio.py
diff --git a/test_voice_playback.py b/tests/test_voice_playback.py
similarity index 100%
rename from test_voice_playback.py
rename to tests/test_voice_playback.py
diff --git a/test_websocket.py b/tests/test_websocket.py
similarity index 100%
rename from test_websocket.py
rename to tests/test_websocket.py