tests/test_evil_moods.py

#!/usr/bin/env python3
"""
Comprehensive Evil Miku Mood Test

Connects to Cheshire Cat via WebSocket for each of the 10 evil moods,
sends varied test messages, and displays responses side-by-side.
Uses the discord_bridge plugin's metadata to set mood and evil mode.

Uses only stdlib (asyncio) + websockets-like raw socket to avoid pip deps.
"""

import asyncio
import json
import http.client
import sys

CAT_HOST = "localhost"
CAT_PORT = 1865

EVIL_MOODS = [
    "aggressive",
    "cunning",
    "sarcastic",
    "evil_neutral",
    "bored",
    "manic",
    "jealous",
    "melancholic",
    "playful_cruel",
    "contemptuous",
]

# Varied messages to test different mood expressions
TEST_MESSAGES = [
    "Hey, how's it going?",
    "What do you think about humans?",
    "Tell me something interesting.",
]


def query_cat_http(mood: str, message: str, timeout: float = 120.0) -> str:
    """Send a message to the Cat via HTTP POST /message with mood metadata."""
    payload = json.dumps({
        "text": message,
        "discord_mood": mood,
        "discord_evil_mode": True,
    })

    try:
        conn = http.client.HTTPConnection(CAT_HOST, CAT_PORT, timeout=timeout)
        headers = {"Content-Type": "application/json", "user_id": f"mood_test_{mood}"}
        conn.request("POST", "/message", body=payload, headers=headers)
        resp = conn.getresponse()
        if resp.status == 200:
            data = json.loads(resp.read().decode())
            return data.get("content", "(empty)")
        else:
            return f"(HTTP {resp.status})"
    except Exception as e:
        return f"(error: {e})"


def run_tests():
    print("=" * 80)
    print("  EVIL MIKU COMPREHENSIVE MOOD TEST")
    print("=" * 80)
    print(f"  Testing {len(EVIL_MOODS)} moods × {len(TEST_MESSAGES)} messages")
    print(f"  Cat HTTP: http://{CAT_HOST}:{CAT_PORT}")
    print("=" * 80)

    results = {}

    for mood in EVIL_MOODS:
        results[mood] = []
        print(f"\n{'─' * 80}")
        print(f"  MOOD: {mood.upper()}")
        print(f"{'─' * 80}")

        for i, message in enumerate(TEST_MESSAGES):
            print(f"\n  [{i+1}/{len(TEST_MESSAGES)}] User: {message}")
            response = query_cat_http(mood, message)
            results[mood].append(response)
            print(f"  Evil Miku: {response}")

    # Summary
    print(f"\n\n{'=' * 80}")
    print("  SUMMARY")
    print(f"{'=' * 80}")

    # Check for identical responses (the main problem we're trying to fix)
    all_responses = []
    for mood, responses in results.items():
        all_responses.extend(responses)

    unique = set(all_responses)
    print(f"\n  Total responses: {len(all_responses)}")
    print(f"  Unique responses: {len(unique)}")

    if len(unique) < len(all_responses) * 0.7:
        print("  ⚠️  WARNING: Many duplicate responses detected!")
    else:
        print("  ✅ Good variety in responses!")

    # Check for "*rolls eyes*" only responses
    action_only = [r for r in all_responses if r.strip().startswith("*") and r.strip().endswith("*") and len(r.strip()) < 30]
    if action_only:
        print(f"  ⚠️  {len(action_only)} action-only responses (e.g., '*rolls eyes*')")
    else:
        print("  ✅ No action-only responses!")

    # Average response length
    lengths = [len(r) for r in all_responses if not r.startswith("(")]
    if lengths:
        avg = sum(lengths) / len(lengths)
        print(f"  Avg response length: {avg:.0f} chars")

    print()


if __name__ == "__main__":
    run_tests()
-												refactor: deduplicate prompts, reorganize persona files, update paths

Prompt deduplication (~20% reduction, 4,743 chars saved):
- evil_miku_lore.txt: remove intra-file duplication (height rule 2x,
  cruelty-has-substance 2x, music secret 2x, adoration secret 2x),
  trim verbose restatements, cut speech examples from 10 to 6
- evil_miku_prompt.txt: remove entire PERSONALITY section (in lore),
  remove entire RESPONSE STYLE section (now only in preamble),
  soften height from prohibition to knowledge
- miku_lore.txt: remove RELATIONSHIPS section (duplicates FRIENDS)
- miku_prompt.txt: remove duplicate intro, 4 personality traits
  already in lore, FAMOUS SONGS section (in lore), fix response
  length inconsistency (1-2 vs 2-3 -> consistent 2-3)

Preamble updates (evil_mode.py, evil_miku_personality.py, llm.py,
miku_personality.py):
- Response rules now exist in ONE place only (preamble)
- Height rule softened: model knows 15.8m, can say it if asked,
  but won't default to quoting it when taunting
- Response length: 2-4 sentences (was 1-3), removed action template
  list that model was copying literally (*scoffs*, *rolls eyes*)
- Added: always include actual words, never action-only responses
- Normal Miku: trim CHARACTER CONTEXT, fix 1-3 -> 2-3 sentences

Directory reorganization:
- Move 6 persona files to bot/persona/{evil,miku}/ subdirectories
- Update all open() paths in evil_mode.py, context_manager.py,
  voice_manager.py, both Cat plugins
- Dockerfile: 6 COPY lines -> 1 (COPY persona /app/persona)
- docker-compose: 6 file mounts -> 2 directory mounts
  (bot/persona/evil -> cat/data/evil, bot/persona/miku -> cat/data/miku)

Evil Miku system (previously unstaged):
- Full evil mood management: 2h rotation timer, mood persistence,
  10 mood-specific autonomous template pools, mood-aware DMs
- Evil mode toggle with role color/nickname/pfp management
- get_evil_system_prompt() with mood integration

Add test_evil_moods.py: 10-mood x 3-message comprehensive test

											
										
										
											2026-02-27 13:14:03 +02:00
+								#!/usr/bin/env python3
 								"""
 								Comprehensive Evil Miku Mood Test
 								Connects to Cheshire Cat via WebSocket for each of the 10 evil moods,
 								sends varied test messages, and displays responses side-by-side.
 								Uses the discord_bridge plugin's metadata to set mood and evil mode.
 								Uses only stdlib (asyncio) + websockets-like raw socket to avoid pip deps.
 								"""
 								import asyncio
 								import json
 								import http.client
 								import sys
 								CAT_HOST = "localhost"
 								CAT_PORT = 1865
 								EVIL_MOODS = [
 								    "aggressive",
 								    "cunning",
 								    "sarcastic",
 								    "evil_neutral",
 								    "bored",
 								    "manic",
 								    "jealous",
 								    "melancholic",
 								    "playful_cruel",
 								    "contemptuous",
 								]
 								# Varied messages to test different mood expressions
 								TEST_MESSAGES = [
 								    "Hey, how's it going?",
 								    "What do you think about humans?",
 								    "Tell me something interesting.",
 								]
 								def query_cat_http(mood: str, message: str, timeout: float = 120.0) -> str:
 								    """Send a message to the Cat via HTTP POST /message with mood metadata."""
 								    payload = json.dumps({
 								        "text": message,
 								        "discord_mood": mood,
 								        "discord_evil_mode": True,
 								    })
 								    try:
 								        conn = http.client.HTTPConnection(CAT_HOST, CAT_PORT, timeout=timeout)
 								        headers = {"Content-Type": "application/json", "user_id": f"mood_test_{mood}"}
 								        conn.request("POST", "/message", body=payload, headers=headers)
 								        resp = conn.getresponse()
 								        if resp.status == 200:
 								            data = json.loads(resp.read().decode())
 								            return data.get("content", "(empty)")
 								        else:
 								            return f"(HTTP {resp.status})"
 								    except Exception as e:
 								        return f"(error: {e})"
 								def run_tests():
 								    print("=" * 80)
 								    print("  EVIL MIKU COMPREHENSIVE MOOD TEST")
 								    print("=" * 80)
 								    print(f"  Testing {len(EVIL_MOODS)} moods × {len(TEST_MESSAGES)} messages")
 								    print(f"  Cat HTTP: http://{CAT_HOST}:{CAT_PORT}")
 								    print("=" * 80)
 								    results = {}
 								    for mood in EVIL_MOODS:
 								        results[mood] = []
 								        print(f"\n{'─' * 80}")
 								        print(f"  MOOD: {mood.upper()}")
 								        print(f"{'─' * 80}")
 								        for i, message in enumerate(TEST_MESSAGES):
 								            print(f"\n  [{i+1}/{len(TEST_MESSAGES)}] User: {message}")
 								            response = query_cat_http(mood, message)
 								            results[mood].append(response)
 								            print(f"  Evil Miku: {response}")
 								    # Summary
 								    print(f"\n\n{'=' * 80}")
 								    print("  SUMMARY")
 								    print(f"{'=' * 80}")
 								    # Check for identical responses (the main problem we're trying to fix)
 								    all_responses = []
 								    for mood, responses in results.items():
 								        all_responses.extend(responses)
 								    unique = set(all_responses)
 								    print(f"\n  Total responses: {len(all_responses)}")
 								    print(f"  Unique responses: {len(unique)}")
 								    if len(unique) < len(all_responses) * 0.7:
 								        print("  ⚠️  WARNING: Many duplicate responses detected!")
 								    else:
 								        print("  ✅ Good variety in responses!")
 								    # Check for "*rolls eyes*" only responses
 								    action_only = [r for r in all_responses if r.strip().startswith("*") and r.strip().endswith("*") and len(r.strip()) < 30]
 								    if action_only:
 								        print(f"  ⚠️  {len(action_only)} action-only responses (e.g., '*rolls eyes*')")
 								    else:
 								        print("  ✅ No action-only responses!")
 								    # Average response length
 								    lengths = [len(r) for r in all_responses if not r.startswith("(")]
 								    if lengths:
 								        avg = sum(lengths) / len(lengths)
 								        print(f"  Avg response length: {avg:.0f} chars")
 								    print()
 								if __name__ == "__main__":
 								    run_tests()