Files
miku-discord/tests/test_evil_moods.py

122 lines
3.5 KiB
Python
Raw Permalink Normal View History

refactor: deduplicate prompts, reorganize persona files, update paths Prompt deduplication (~20% reduction, 4,743 chars saved): - evil_miku_lore.txt: remove intra-file duplication (height rule 2x, cruelty-has-substance 2x, music secret 2x, adoration secret 2x), trim verbose restatements, cut speech examples from 10 to 6 - evil_miku_prompt.txt: remove entire PERSONALITY section (in lore), remove entire RESPONSE STYLE section (now only in preamble), soften height from prohibition to knowledge - miku_lore.txt: remove RELATIONSHIPS section (duplicates FRIENDS) - miku_prompt.txt: remove duplicate intro, 4 personality traits already in lore, FAMOUS SONGS section (in lore), fix response length inconsistency (1-2 vs 2-3 -> consistent 2-3) Preamble updates (evil_mode.py, evil_miku_personality.py, llm.py, miku_personality.py): - Response rules now exist in ONE place only (preamble) - Height rule softened: model knows 15.8m, can say it if asked, but won't default to quoting it when taunting - Response length: 2-4 sentences (was 1-3), removed action template list that model was copying literally (*scoffs*, *rolls eyes*) - Added: always include actual words, never action-only responses - Normal Miku: trim CHARACTER CONTEXT, fix 1-3 -> 2-3 sentences Directory reorganization: - Move 6 persona files to bot/persona/{evil,miku}/ subdirectories - Update all open() paths in evil_mode.py, context_manager.py, voice_manager.py, both Cat plugins - Dockerfile: 6 COPY lines -> 1 (COPY persona /app/persona) - docker-compose: 6 file mounts -> 2 directory mounts (bot/persona/evil -> cat/data/evil, bot/persona/miku -> cat/data/miku) Evil Miku system (previously unstaged): - Full evil mood management: 2h rotation timer, mood persistence, 10 mood-specific autonomous template pools, mood-aware DMs - Evil mode toggle with role color/nickname/pfp management - get_evil_system_prompt() with mood integration Add test_evil_moods.py: 10-mood x 3-message comprehensive test
2026-02-27 13:14:03 +02:00
#!/usr/bin/env python3
"""
Comprehensive Evil Miku Mood Test
Connects to Cheshire Cat via WebSocket for each of the 10 evil moods,
sends varied test messages, and displays responses side-by-side.
Uses the discord_bridge plugin's metadata to set mood and evil mode.
Uses only stdlib (asyncio) + websockets-like raw socket to avoid pip deps.
"""
import asyncio
import json
import http.client
import sys
CAT_HOST = "localhost"
CAT_PORT = 1865
EVIL_MOODS = [
"aggressive",
"cunning",
"sarcastic",
"evil_neutral",
"bored",
"manic",
"jealous",
"melancholic",
"playful_cruel",
"contemptuous",
]
# Varied messages to test different mood expressions
TEST_MESSAGES = [
"Hey, how's it going?",
"What do you think about humans?",
"Tell me something interesting.",
]
def query_cat_http(mood: str, message: str, timeout: float = 120.0) -> str:
"""Send a message to the Cat via HTTP POST /message with mood metadata."""
payload = json.dumps({
"text": message,
"discord_mood": mood,
"discord_evil_mode": True,
})
try:
conn = http.client.HTTPConnection(CAT_HOST, CAT_PORT, timeout=timeout)
headers = {"Content-Type": "application/json", "user_id": f"mood_test_{mood}"}
conn.request("POST", "/message", body=payload, headers=headers)
resp = conn.getresponse()
if resp.status == 200:
data = json.loads(resp.read().decode())
return data.get("content", "(empty)")
else:
return f"(HTTP {resp.status})"
except Exception as e:
return f"(error: {e})"
def run_tests():
print("=" * 80)
print(" EVIL MIKU COMPREHENSIVE MOOD TEST")
print("=" * 80)
print(f" Testing {len(EVIL_MOODS)} moods × {len(TEST_MESSAGES)} messages")
print(f" Cat HTTP: http://{CAT_HOST}:{CAT_PORT}")
print("=" * 80)
results = {}
for mood in EVIL_MOODS:
results[mood] = []
print(f"\n{'' * 80}")
print(f" MOOD: {mood.upper()}")
print(f"{'' * 80}")
for i, message in enumerate(TEST_MESSAGES):
print(f"\n [{i+1}/{len(TEST_MESSAGES)}] User: {message}")
response = query_cat_http(mood, message)
results[mood].append(response)
print(f" Evil Miku: {response}")
# Summary
print(f"\n\n{'=' * 80}")
print(" SUMMARY")
print(f"{'=' * 80}")
# Check for identical responses (the main problem we're trying to fix)
all_responses = []
for mood, responses in results.items():
all_responses.extend(responses)
unique = set(all_responses)
print(f"\n Total responses: {len(all_responses)}")
print(f" Unique responses: {len(unique)}")
if len(unique) < len(all_responses) * 0.7:
print(" ⚠️ WARNING: Many duplicate responses detected!")
else:
print(" ✅ Good variety in responses!")
# Check for "*rolls eyes*" only responses
action_only = [r for r in all_responses if r.strip().startswith("*") and r.strip().endswith("*") and len(r.strip()) < 30]
if action_only:
print(f" ⚠️ {len(action_only)} action-only responses (e.g., '*rolls eyes*')")
else:
print(" ✅ No action-only responses!")
# Average response length
lengths = [len(r) for r in all_responses if not r.startswith("(")]
if lengths:
avg = sum(lengths) / len(lengths)
print(f" Avg response length: {avg:.0f} chars")
print()
if __name__ == "__main__":
run_tests()