miku-discord/test_evil_moods.py

#!/usr/bin/env python3
"""
Comprehensive Evil Miku Mood Test

Connects to Cheshire Cat via WebSocket for each of the 10 evil moods,
sends varied test messages, and displays responses side-by-side.
Uses the discord_bridge plugin's metadata to set mood and evil mode.

Uses only stdlib (asyncio) + websockets-like raw socket to avoid pip deps.
"""

import asyncio
import json
import http.client
import sys

CAT_HOST = "localhost"
CAT_PORT = 1865

EVIL_MOODS = [
    "aggressive",
    "cunning",
    "sarcastic",
    "evil_neutral",
    "bored",
    "manic",
    "jealous",
    "melancholic",
    "playful_cruel",
    "contemptuous",
]

# Varied messages to test different mood expressions
TEST_MESSAGES = [
    "Hey, how's it going?",
    "What do you think about humans?",
    "Tell me something interesting.",
]


def query_cat_http(mood: str, message: str, timeout: float = 120.0) -> str:
    """Send a message to the Cat via HTTP POST /message with mood metadata."""
    payload = json.dumps({
        "text": message,
        "discord_mood": mood,
        "discord_evil_mode": True,
    })

    try:
        conn = http.client.HTTPConnection(CAT_HOST, CAT_PORT, timeout=timeout)
        headers = {"Content-Type": "application/json", "user_id": f"mood_test_{mood}"}
        conn.request("POST", "/message", body=payload, headers=headers)
        resp = conn.getresponse()
        if resp.status == 200:
            data = json.loads(resp.read().decode())
            return data.get("content", "(empty)")
        else:
            return f"(HTTP {resp.status})"
    except Exception as e:
        return f"(error: {e})"


def run_tests():
    print("=" * 80)
    print("  EVIL MIKU COMPREHENSIVE MOOD TEST")
    print("=" * 80)
    print(f"  Testing {len(EVIL_MOODS)} moods × {len(TEST_MESSAGES)} messages")
    print(f"  Cat HTTP: http://{CAT_HOST}:{CAT_PORT}")
    print("=" * 80)

    results = {}

    for mood in EVIL_MOODS:
        results[mood] = []
        print(f"\n{'─' * 80}")
        print(f"  MOOD: {mood.upper()}")
        print(f"{'─' * 80}")

        for i, message in enumerate(TEST_MESSAGES):
            print(f"\n  [{i+1}/{len(TEST_MESSAGES)}] User: {message}")
            response = query_cat_http(mood, message)
            results[mood].append(response)
            print(f"  Evil Miku: {response}")

    # Summary
    print(f"\n\n{'=' * 80}")
    print("  SUMMARY")
    print(f"{'=' * 80}")

    # Check for identical responses (the main problem we're trying to fix)
    all_responses = []
    for mood, responses in results.items():
        all_responses.extend(responses)

    unique = set(all_responses)
    print(f"\n  Total responses: {len(all_responses)}")
    print(f"  Unique responses: {len(unique)}")

    if len(unique) < len(all_responses) * 0.7:
        print("  ⚠️  WARNING: Many duplicate responses detected!")
    else:
        print("  ✅ Good variety in responses!")

    # Check for "*rolls eyes*" only responses
    action_only = [r for r in all_responses if r.strip().startswith("*") and r.strip().endswith("*") and len(r.strip()) < 30]
    if action_only:
        print(f"  ⚠️  {len(action_only)} action-only responses (e.g., '*rolls eyes*')")
    else:
        print("  ✅ No action-only responses!")

    # Average response length
    lengths = [len(r) for r in all_responses if not r.startswith("(")]
    if lengths:
        avg = sum(lengths) / len(lengths)
        print(f"  Avg response length: {avg:.0f} chars")

    print()


if __name__ == "__main__":
    run_tests()