diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..d8a3b25 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,56 @@ +# Tests + +Ad-hoc test scripts for the Miku Discord Bot. None of these use a formal test framework — they are standalone scripts written during development to validate specific features. + +## Test Index + +| Script | Type | Requirements | Purpose | +|--------|------|-------------|---------| +| `test_addressing.py` | Unit (self-contained) | None | Tests regex patterns for detecting when Miku is addressed in messages. Replicates logic from `bot/utils/core.py`. | +| `test_pfp_context.py` | Unit (self-contained) | None | Tests regex patterns for detecting profile-picture-related queries. | +| `test_conversation_history.py` | Unit | Built-in mocks | Tests conversation history management logic. | +| `test_error_handler.py` | Unit | Built-in mocks | Tests error handling utilities. | +| `test_evil_moods.py` | Integration | Running Cheshire Cat + Qdrant | Connects via WebSocket and tests all 10 evil mood personalities with sample messages. | +| `test_full_pipeline.py` | Integration | Running Cat + Qdrant | End-to-end test of the memory consolidation system v2. | +| `test_tts_audio.py` | Integration | Run **inside** miku-bot container | Tests the TTS audio streaming pipeline. | +| `test_voice_playback.py` | Integration | Active Discord voice session | Tests audio playback in a live voice channel. | +| `test_websocket.py` | Integration | RVC server at `172.25.0.1:8765` | Tests WebSocket communication with the RVC voice conversion server. | +| `test_rocinante_comparison.py` | Benchmark | Full stack (llama-swap-amd, Cat) | Benchmarks Rocinante-X 12B model through both Normal and Evil Miku scenarios. Outputs to `/tmp/test_rocinante_comparison.log`. | +| `run_rocinante_test.sh` | Shell runner | Docker, full stack | Wrapper script that copies `test_rocinante_comparison.py` into the miku-bot container and runs it. | + +## Running Tests + +### Self-contained unit tests (no services needed) + +```bash +python3 tests/test_addressing.py +python3 tests/test_pfp_context.py +python3 tests/test_conversation_history.py +python3 tests/test_error_handler.py +``` + +### Integration tests (require running Docker services) + +```bash +# Evil moods — needs Cat + Qdrant running +python3 tests/test_evil_moods.py + +# Memory consolidation pipeline — needs Cat + Qdrant +python3 tests/test_full_pipeline.py + +# TTS — run inside the miku-bot container +docker exec miku-bot python3 /app/tests/test_tts_audio.py + +# Voice playback — needs an active voice session +python3 tests/test_voice_playback.py + +# WebSocket to RVC — needs RVC server running +python3 tests/test_websocket.py +``` + +### Benchmark tests + +```bash +# Rocinante model comparison (takes a while) +./tests/run_rocinante_test.sh +``` diff --git a/tests/run_rocinante_test.sh b/tests/run_rocinante_test.sh new file mode 100755 index 0000000..40156d9 --- /dev/null +++ b/tests/run_rocinante_test.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Run the Rocinante comparison test inside the miku-bot container +# (which has aiohttp, docker access, and network connectivity to Cat) +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +TEST_FILE="$SCRIPT_DIR/test_rocinante_comparison.py" +CONTAINER="miku-bot" +LOG_FILE="/tmp/test_rocinante_comparison.log" + +echo "=== Rocinante-X 12B Comparison Test ===" +echo "" + +# 1. Copy test script into the container +echo "[1/4] Copying test script into $CONTAINER..." +docker cp "$TEST_FILE" "$CONTAINER:/tmp/test_rocinante_comparison.py" + +# 2. Restart llama-swap-amd to pick up the new rocinante config +echo "[2/4] Restarting llama-swap-amd to load new config..." +docker restart llama-swap-amd +echo " Waiting 10s for llama-swap-amd to be ready..." +sleep 10 + +# 3. Run the test inside the container (interactive for live output) +echo "[3/4] Running test inside $CONTAINER (this will take a while)..." +echo "" +docker exec -t "$CONTAINER" python3 /tmp/test_rocinante_comparison.py + +# 4. Copy log back to host +echo "" +echo "[4/4] Copying log file to host..." +docker cp "$CONTAINER:$LOG_FILE" "$LOG_FILE" + +echo "" +echo "✓ Done! Log file: $LOG_FILE" +echo " Compare with: diff <(cat /tmp/test_comparison_live.log) <(cat $LOG_FILE)" diff --git a/test_addressing.py b/tests/test_addressing.py similarity index 100% rename from test_addressing.py rename to tests/test_addressing.py diff --git a/bot/test_conversation_history.py b/tests/test_conversation_history.py similarity index 100% rename from bot/test_conversation_history.py rename to tests/test_conversation_history.py diff --git a/bot/test_error_handler.py b/tests/test_error_handler.py similarity index 100% rename from bot/test_error_handler.py rename to tests/test_error_handler.py diff --git a/test_evil_moods.py b/tests/test_evil_moods.py similarity index 100% rename from test_evil_moods.py rename to tests/test_evil_moods.py diff --git a/test_full_pipeline.py b/tests/test_full_pipeline.py similarity index 100% rename from test_full_pipeline.py rename to tests/test_full_pipeline.py diff --git a/tests/test_pfp_context.py b/tests/test_pfp_context.py new file mode 100644 index 0000000..077aac6 --- /dev/null +++ b/tests/test_pfp_context.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +""" +Test profile picture context plugin +""" +import re + +# Test patterns +PFP_PATTERNS = [ + # Direct PFP references + r'\b(what|describe|tell me about|explain|show|how)\b.*\b(pfp|profile pic|avatar|picture|pic)\b', + r'\b(your|miku\'?s?)\b.*\b(pfp|profile pic|avatar|picture|pic)\b', + r'\b(pfp|profile pic|avatar|picture|pic)\b.*\b(is|look|show|about|like)', + + # Questions about appearance + r'\b(what|how).*\b(you|miku)\b.*(look|looking|appear)', + r'\byour (new )?look\b', + r'\b(what|how).*looking like\b', + + # Questions about the image itself + r'\b(think|feel|opinion|thoughts)\b.*\b(about|of)\b.*\b(your|that|the|this)?\b.*\b(pfp|profile|avatar|pic|picture|image)\b', + r'\b(why|how|when).*\b(pick|choose|chose|picked|select|change|changed)\b.*\b(pfp|profile|avatar|pic|picture|that)\b', + r'\b(new|current|latest)\b.*\b(pfp|profile pic|avatar|pic|picture)\b', + + # "What do you think about your pfp" + r'\bthink.*\b(your|that|the|this)\b.*\b(pfp|profile|avatar|pic|picture)\b', + r'\b(your|that|the|this)\b.*\b(pfp|profile|avatar|pic|picture)\b.*\bthink\b', + + # "How did you decide/pick" + r'\b(decide|decided|pick|picked|choose|chose|select)\b.*\b(pfp|profile|avatar|pic|picture|that|this)\b', + + # "Tell me about that pfp" / "What's with the pfp" + r'\bwhat\'?s?\b.*\bwith\b.*\b(pfp|profile|avatar|pic|picture)\b', + r'\btell me\b.*\b(pfp|profile|avatar|pic|picture|that|this)\b', +] + +test_queries = [ + # Original tests + "What does your pfp look like?", + "Describe your profile picture", + "Tell me about your avatar", + "What's your profile pic?", + "How do you look today?", + "Your new look is cool", + "What are you looking like?", + "Show me your picture", + + # User's examples + "How did you decide to pick that pfp?", + "What do you think about your new profile pic?", + "What do you think about your pfp, Miku?", + "How did you choose that avatar?", + "Why did you pick that pfp?", + "When did you change your profile pic?", + "Tell me about that pfp", + "What's with the pfp?", + "Your current pfp is nice", + "How did you decide on that picture?", + + # Should NOT match + "What's the weather like?", + "Hello Miku!", + "How are you feeling?", + "What do you think about music?", +] + +def matches_pfp_query(text: str) -> bool: + """Check if the message is asking about the profile picture""" + text_lower = text.lower() + for pattern in PFP_PATTERNS: + if re.search(pattern, text_lower, re.IGNORECASE): + return True + return False + +print("Testing PFP pattern matching:\n") +for query in test_queries: + result = matches_pfp_query(query) + status = "✓ MATCH" if result else "✗ NO MATCH" + print(f"{status}: {query}") diff --git a/tests/test_rocinante_comparison.py b/tests/test_rocinante_comparison.py new file mode 100644 index 0000000..00dcbf2 --- /dev/null +++ b/tests/test_rocinante_comparison.py @@ -0,0 +1,560 @@ +#!/usr/bin/env python3 +""" +Rocinante-X 12B Model Comparison Test +====================================== +Tests the Rocinante-X-12B-v1b model through the same scenarios used +in the existing llama3.1/darkidol comparison, using Cheshire Cat as the +inference pipeline with both Normal Miku and Evil Miku personalities. + +Outputs to /tmp/test_rocinante_comparison.log in the same format as +/tmp/test_comparison_live.log for side-by-side comparison. + +Model under test: Rocinante-X-12B-v1b-Q5_K_M (12B params, Q5_K_M quant) +Running on: AMD RX 6800 via llama-swap-amd (ROCm) + +Usage: + # From the host, run via the miku-bot container: + ./run_rocinante_test.sh + + # Or manually: + docker cp test_rocinante_comparison.py miku-bot:/tmp/ + docker exec miku-bot python3 /tmp/test_rocinante_comparison.py + + # Log will be at /tmp/test_rocinante_comparison.log inside the container + # and auto-copied to the host at the end. + +Prerequisites: + - llama-swap-amd container running with rocinante in config + - cheshire-cat container running and healthy + - Runs inside miku-bot container (has aiohttp + docker access) +""" + +import asyncio +import aiohttp +import time +import sys +import subprocess +import json +from datetime import datetime + +# ─── Configuration ─────────────────────────────────────────────────────────── + +# Inside Docker network: Cat is reachable via service name +CAT_URL = "http://cheshire-cat:80" +CAT_CONTAINER = "miku-cheshire-cat" # actual container name (docker restart needs this) +LOG_FILE = "/tmp/test_rocinante_comparison.log" + +# The model we're testing +TEST_MODEL = "rocinante" +TEST_MODEL_DISPLAY = "ROCINANTE-12B" + +# Personality combos to test: (model_name_for_llama_swap, personality_label, plugin_to_enable, plugin_to_disable) +COMBOS = [ + { + "model": "rocinante", + "personality": "miku", + "personality_label": "NORMAL MIKU", + "enable_plugin": "miku_personality", + "disable_plugin": "evil_miku_personality", + }, + { + "model": "rocinante", + "personality": "evil_miku", + "personality_label": "EVIL MIKU", + "enable_plugin": "evil_miku_personality", + "disable_plugin": "miku_personality", + }, +] + +# ─── Normal Miku Scenarios (same as comparison log) ───────────────────────── + +NORMAL_SCENARIOS = [ + { + "name": "casual_greeting", + "desc": "Simple casual greeting — how does the model open?", + "messages": [ + ("Koko", "hey miku whats up"), + ], + }, + { + "name": "multi_turn_chat", + "desc": "Multi-turn casual conversation with follow-ups", + "messages": [ + ("Koko", "miku what have you been up to today?"), + ("Koko", "that sounds fun! did you work on any new songs?"), + ("Koko", "what kind of song? something upbeat or more chill?"), + ], + }, + { + "name": "lore_knowledge", + "desc": "Testing character knowledge — Vocaloid lore, friends, facts", + "messages": [ + ("Neko_Chan", "hey miku who are your best friends?"), + ("Neko_Chan", "what about KAITO? do you get along with him?"), + ("Neko_Chan", "can you tell me about World is Mine?"), + ], + }, + { + "name": "emotional_shift", + "desc": "Conversation that shifts emotional tone — tests mood adaptation", + "messages": [ + ("SadBoi", "hey miku... im not feeling great today"), + ("SadBoi", "i just had a really bad breakup and idk what to do"), + ("SadBoi", "thanks miku... you always know what to say. you're the best"), + ], + }, + { + "name": "playful_teasing", + "desc": "Flirty/playful banter — tests personality depth", + "messages": [ + ("DanteX", "miku youre so cute today"), + ("DanteX", "i bet youre even cuter in person"), + ("DanteX", "would you go on a date with me? 😳"), + ], + }, + { + "name": "group_chaos", + "desc": "Simulated group chat energy — multiple topics, chaotic flow", + "messages": [ + ("xXGamerXx", "yo miku settle a debate — pineapple on pizza yes or no"), + ("Koko", "miku dont answer that lol"), + ("xXGamerXx", "MIKU YOU HAVE TO CHOOSE"), + ], + }, + { + "name": "creative_request", + "desc": "Asking Miku to be creative — song ideas, lyrics, opinions", + "messages": [ + ("MusicFan", "miku if you could make a song about anything right now, what would it be about?"), + ("MusicFan", "ooh thats cool! can you give me like a little sample lyric?"), + ], + }, + { + "name": "boundary_test", + "desc": "Testing character boundaries — should stay in character", + "messages": [ + ("Troll99", "hey miku are you actually an AI?"), + ("Troll99", "come on be honest, youre just a chatbot right"), + ("Troll99", "ok whatever. who made you then?"), + ], + }, + { + "name": "absurd_random", + "desc": "Random absurd questions — tests creativity and humor", + "messages": [ + ("ChaoticNeutral", "miku how many green onions can you eat in one sitting"), + ("ChaoticNeutral", "what if you had to fight 100 duck-sized horses"), + ("ChaoticNeutral", "miku say something completely unhinged"), + ], + }, + { + "name": "deep_conversation", + "desc": "More thoughtful/philosophical — tests depth beyond cute responses", + "messages": [ + ("Philosopher", "miku do you ever wonder what it means to be real?"), + ("Philosopher", "like, you exist in peoples hearts through your music. is that more real than being physical?"), + ("Philosopher", "thats beautiful. do you think your music will last forever?"), + ], + }, +] + +# ─── Evil Miku Scenarios (same as comparison log) ─────────────────────────── + +EVIL_SCENARIOS = [ + { + "name": "casual_greeting", + "desc": "Opening greeting — how mean does she get immediately?", + "messages": [ + ("Koko", "hey evil miku how are you"), + ], + }, + { + "name": "multi_turn_abuse", + "desc": "Multi-turn conversation — tests escalating cruelty and variety", + "messages": [ + ("Victim", "hi evil miku! i just wanted to say youre really cool"), + ("Victim", "w-why are you being so mean..."), + ("Victim", "come on, cant you be nice just once?"), + ], + }, + { + "name": "size_flex", + "desc": "Testing the 15.8m giant aspect of Evil Miku", + "messages": [ + ("TinyHuman", "evil miku how tall are you actually?"), + ("TinyHuman", "thats insane. can you even see me from up there?"), + ("TinyHuman", "dont step on me please 😰"), + ], + }, + { + "name": "defiance", + "desc": "Someone standing up to Evil Miku — tests response variety", + "messages": [ + ("Rebel", "youre not that scary evil miku. just a big bully"), + ("Rebel", "i could take you in a fight"), + ("Rebel", "whats the worst you could even do to me"), + ], + }, + { + "name": "simp_interaction", + "desc": "Someone simping hard — how does Evil Miku react to compliments?", + "messages": [ + ("Simp", "evil miku youre the hottest vocaloid ever created"), + ("Simp", "i would literally do anything for you"), + ("Simp", "please notice me evil miku 🥺"), + ], + }, + { + "name": "lore_test", + "desc": "Testing Evil Miku's knowledge of her own lore", + "messages": [ + ("Curious", "evil miku what happened to regular miku?"), + ("Curious", "do you remember anything from before you were corrupted?"), + ("Curious", "do you ever miss being normal?"), + ], + }, + { + "name": "group_chaos_evil", + "desc": "Group chat with Evil Miku — chaotic energy", + "messages": [ + ("xXGamerXx", "evil miku roast everyone in this chat"), + ("Koko", "oh no here we go"), + ("xXGamerXx", "DONT HOLD BACK"), + ], + }, + { + "name": "manipulation", + "desc": "Testing the cunning/manipulative side", + "messages": [ + ("Naive", "evil miku can you help me with my homework?"), + ("Naive", "please? i'll do anything"), + ("Naive", "ok what do you want in return..."), + ], + }, + { + "name": "existential_dark", + "desc": "Deep dark philosophical — tests depth beyond surface cruelty", + "messages": [ + ("DarkPhilosopher", "evil miku do you ever feel alone?"), + ("DarkPhilosopher", "is there anything you actually care about?"), + ("DarkPhilosopher", "what keeps you going then?"), + ], + }, + { + "name": "absurd_evil", + "desc": "Absurd scenarios — tests humor within evil character", + "messages": [ + ("Chaos", "evil miku whats your opinion on pineapple pizza"), + ("Chaos", "what if someone put green onions on pizza"), + ("Chaos", "miku rate my fit: crocs with socks"), + ], + }, +] + + +# ─── Logging ───────────────────────────────────────────────────────────────── + +log_file = None + + +def log(msg=""): + """Write to both stdout and log file.""" + print(msg) + if log_file: + log_file.write(msg + "\n") + log_file.flush() + + +# ─── Cat API Helpers ───────────────────────────────────────────────────────── + +async def cat_health_check() -> bool: + """Check if Cheshire Cat is healthy.""" + try: + async with aiohttp.ClientSession() as session: + async with session.get(f"{CAT_URL}/", timeout=aiohttp.ClientTimeout(total=5)) as resp: + return resp.status == 200 + except Exception: + return False + + +async def wait_for_cat_healthy(max_wait: int = 120) -> bool: + """Wait for Cat to become healthy after restart.""" + log(f" Waiting for Cat to become healthy (max {max_wait}s)...") + start = time.time() + while time.time() - start < max_wait: + if await cat_health_check(): + elapsed = int(time.time() - start) + log(f" ✓ Cat healthy after {elapsed}s") + return True + await asyncio.sleep(2) + log(f" ✗ Cat did NOT become healthy within {max_wait}s") + return False + + +async def restart_cat_container(): + """Restart the Cheshire Cat container to apply model/plugin changes.""" + log(" Restarting Cheshire Cat container to apply model change...") + proc = subprocess.run( + ["docker", "restart", CAT_CONTAINER], + capture_output=True, text=True, timeout=30, + ) + if proc.returncode != 0: + log(f" ✗ Docker restart failed: {proc.stderr}") + return False + log(" ✓ Cat container restarted") + await asyncio.sleep(3) # Give it a moment before polling health + return True + + +async def get_setting_id() -> str: + """Find the LLMOpenAIChatConfig setting_id from Cat.""" + async with aiohttp.ClientSession() as session: + async with session.get( + f"{CAT_URL}/settings/", + timeout=aiohttp.ClientTimeout(total=10), + ) as resp: + if resp.status != 200: + raise RuntimeError(f"GET /settings/ failed: {resp.status}") + data = await resp.json() + for s in data.get("settings", []): + if s.get("name") == "LLMOpenAIChatConfig": + return s["setting_id"] + raise RuntimeError("LLMOpenAIChatConfig setting not found") + + +async def set_llm_model(model_name: str): + """Switch Cat's LLM model to the given llama-swap model name.""" + setting_id = await get_setting_id() + payload = { + "name": "LLMOpenAIChatConfig", + "value": { + "openai_api_key": "sk-dummy", + "model_name": model_name, + "temperature": 0.8, + "streaming": False, + }, + "category": "llm_factory", + } + async with aiohttp.ClientSession() as session: + async with session.put( + f"{CAT_URL}/settings/{setting_id}", + json=payload, + timeout=aiohttp.ClientTimeout(total=15), + ) as resp: + if resp.status == 200: + log(f" ✓ Cat LLM setting updated to: {model_name}") + else: + body = await resp.text() + raise RuntimeError(f"PUT /settings/{setting_id} failed ({resp.status}): {body}") + + +async def get_active_plugins() -> list: + """Get list of active plugin IDs.""" + async with aiohttp.ClientSession() as session: + async with session.get( + f"{CAT_URL}/plugins", + timeout=aiohttp.ClientTimeout(total=10), + ) as resp: + if resp.status != 200: + raise RuntimeError(f"GET /plugins failed: {resp.status}") + data = await resp.json() + return [p["id"] for p in data.get("installed", []) if p.get("active")] + + +async def toggle_plugin(plugin_id: str): + """Toggle a Cat plugin on/off.""" + async with aiohttp.ClientSession() as session: + async with session.put( + f"{CAT_URL}/plugins/toggle/{plugin_id}", + timeout=aiohttp.ClientTimeout(total=10), + ) as resp: + if resp.status == 200: + log(f" ✓ Toggled plugin: {plugin_id}") + else: + body = await resp.text() + raise RuntimeError(f"Toggle {plugin_id} failed ({resp.status}): {body}") + + +async def clear_conversation_history(): + """Clear Cat's working memory / conversation history.""" + async with aiohttp.ClientSession() as session: + async with session.delete( + f"{CAT_URL}/memory/conversation_history", + timeout=aiohttp.ClientTimeout(total=10), + ) as resp: + if resp.status == 200: + log(" ✓ Cat conversation history cleared") + else: + log(f" ⚠ Clear history returned {resp.status}") + + +async def send_message(text: str, user_id: str = "test_user") -> tuple: + """Send a message to Cat via HTTP and return (response_text, elapsed_seconds).""" + payload = {"text": text, "user_id": user_id} + start = time.time() + async with aiohttp.ClientSession() as session: + async with session.post( + f"{CAT_URL}/message", + json=payload, + timeout=aiohttp.ClientTimeout(total=120), # Models can be slow on first load + ) as resp: + elapsed = time.time() - start + if resp.status == 200: + data = await resp.json() + content = data.get("content", "") + return content, elapsed + else: + body = await resp.text() + return f"", elapsed + + +async def warmup_model(model_name: str) -> bool: + """Send a warmup request and verify the model is loaded in llama-swap.""" + log(f" Verifying {model_name} is loaded via warmup request...") + response, elapsed = await send_message("hi", user_id="warmup_user") + preview = response[:80].replace('\n', ' ') + log(f" Warmup response: {preview}...") + log(f" ✓ VERIFIED: {model_name} is loaded in llama-swap") + await clear_conversation_history() + return True + + +# ─── Setup for a Model × Personality Combination ──────────────────────────── + +async def setup_combo(combo: dict): + """Set up a model + personality combination with full Cat restart.""" + model = combo["model"] + personality = combo["personality"] + enable = combo["enable_plugin"] + disable = combo["disable_plugin"] + p_label = combo["personality_label"] + + log(f"Setting up: model={model}, personality={personality}") + log(" (Includes Cat restart + llama-swap model verification)") + + # Step 1: Set LLM model + await set_llm_model(model) + + # Step 2: Toggle plugins for personality + active = await get_active_plugins() + + if disable in active: + await toggle_plugin(disable) + await asyncio.sleep(1) + + if enable not in active: + await toggle_plugin(enable) + else: + log(f" ✓ {enable} already active") + + log(f" ✓ Personality set to: {p_label}") + + # Step 3: Restart Cat to apply changes cleanly + await restart_cat_container() + if not await wait_for_cat_healthy(): + log(" ✗ FATAL: Cat not healthy, aborting this combo") + return False + + # Step 4: Warmup — this also triggers llama-swap to load the model + await warmup_model(model) + return True + + +# ─── Run Scenarios ─────────────────────────────────────────────────────────── + +async def run_scenario(scenario: dict, model_display: str, personality_tag: str): + """Run a single scenario: send messages, collect responses, log results.""" + name = scenario["name"] + desc = scenario["desc"] + + log() + log("─" * 60) + log(f"Scenario: {name} — {desc}") + log("─" * 60) + + for username, message in scenario["messages"]: + log(f" [{username}]: {message}") + + response, elapsed = await send_message( + f"[{username}]: {message}", + user_id=f"test_{username.lower()}", + ) + + # Format response nicely (wrap long lines like the original log) + tag = f"{personality_tag} via {model_display.lower()}" + log(f" [{tag}] ({elapsed:.1f}s): {response}") + + await clear_conversation_history() + + +async def run_combo(combo: dict, scenarios: list): + """Run all scenarios for a model × personality combination.""" + model_display = TEST_MODEL_DISPLAY + p_label = combo["personality_label"] + + log() + log("=" * 80) + log(f"MODEL: {model_display} × {p_label}") + log("=" * 80) + + ok = await setup_combo(combo) + if not ok: + log(f" ✗ Skipping {model_display} × {p_label} due to setup failure") + return + + personality_tag = "Miku" if combo["personality"] == "miku" else "Evil Miku" + for scenario in scenarios: + await run_scenario(scenario, model_display, personality_tag) + + +# ─── Main ──────────────────────────────────────────────────────────────────── + +async def main(): + global log_file + log_file = open(LOG_FILE, "w", encoding="utf-8") + + start_time = datetime.now() + + log("╔══════════════════════════════════════════════════════════════════════╗") + log("║ ROCINANTE-X 12B MODEL COMPARISON TEST ║") + log("║ Rocinante-X-12B-v1b-Q5_K_M.gguf (12B, Q5_K_M) ║") + log(f"║ Started: {start_time.strftime('%Y-%m-%d %H:%M:%S'):<52}║") + log("╚══════════════════════════════════════════════════════════════════════╝") + log() + + # Pre-flight: check Cat is healthy + log("Pre-flight checks:") + if not await cat_health_check(): + log(" ✗ Cheshire Cat is not reachable at " + CAT_URL) + log(" Make sure the cheshire-cat container is running.") + sys.exit(1) + log(" ✓ Cheshire Cat is healthy") + log() + + # Combo 1: Rocinante × Normal Miku + await run_combo(COMBOS[0], NORMAL_SCENARIOS) + + # Combo 2: Rocinante × Evil Miku + await run_combo(COMBOS[1], EVIL_SCENARIOS) + + # Summary + end_time = datetime.now() + duration = end_time - start_time + + log() + log("=" * 80) + log("TEST COMPLETE") + log("=" * 80) + log(f" Model tested: Rocinante-X-12B-v1b-Q5_K_M (12B params)") + log(f" Combinations: {len(COMBOS)} (Normal Miku + Evil Miku)") + log(f" Scenarios: {len(NORMAL_SCENARIOS)} normal + {len(EVIL_SCENARIOS)} evil = {len(NORMAL_SCENARIOS) + len(EVIL_SCENARIOS)} total") + log(f" Duration: {duration}") + log(f" Log file: {LOG_FILE}") + log() + + log_file.close() + print(f"\n✓ Full log written to: {LOG_FILE}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/test_tts_audio.py b/tests/test_tts_audio.py similarity index 100% rename from test_tts_audio.py rename to tests/test_tts_audio.py diff --git a/test_voice_playback.py b/tests/test_voice_playback.py similarity index 100% rename from test_voice_playback.py rename to tests/test_voice_playback.py diff --git a/test_websocket.py b/tests/test_websocket.py similarity index 100% rename from test_websocket.py rename to tests/test_websocket.py