- Moved 8 root-level test scripts + 2 from bot/ to tests/ - Moved run_rocinante_test.sh runner script to tests/ - Added tests/README.md documenting each test's purpose, type, and requirements - Added test_pfp_context.py and test_rocinante_comparison.py (previously untracked)
122 lines
3.5 KiB
Python
122 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Comprehensive Evil Miku Mood Test
|
||
|
||
Connects to Cheshire Cat via WebSocket for each of the 10 evil moods,
|
||
sends varied test messages, and displays responses side-by-side.
|
||
Uses the discord_bridge plugin's metadata to set mood and evil mode.
|
||
|
||
Uses only stdlib (asyncio) + websockets-like raw socket to avoid pip deps.
|
||
"""
|
||
|
||
import asyncio
|
||
import json
|
||
import http.client
|
||
import sys
|
||
|
||
CAT_HOST = "localhost"
|
||
CAT_PORT = 1865
|
||
|
||
EVIL_MOODS = [
|
||
"aggressive",
|
||
"cunning",
|
||
"sarcastic",
|
||
"evil_neutral",
|
||
"bored",
|
||
"manic",
|
||
"jealous",
|
||
"melancholic",
|
||
"playful_cruel",
|
||
"contemptuous",
|
||
]
|
||
|
||
# Varied messages to test different mood expressions
|
||
TEST_MESSAGES = [
|
||
"Hey, how's it going?",
|
||
"What do you think about humans?",
|
||
"Tell me something interesting.",
|
||
]
|
||
|
||
|
||
def query_cat_http(mood: str, message: str, timeout: float = 120.0) -> str:
|
||
"""Send a message to the Cat via HTTP POST /message with mood metadata."""
|
||
payload = json.dumps({
|
||
"text": message,
|
||
"discord_mood": mood,
|
||
"discord_evil_mode": True,
|
||
})
|
||
|
||
try:
|
||
conn = http.client.HTTPConnection(CAT_HOST, CAT_PORT, timeout=timeout)
|
||
headers = {"Content-Type": "application/json", "user_id": f"mood_test_{mood}"}
|
||
conn.request("POST", "/message", body=payload, headers=headers)
|
||
resp = conn.getresponse()
|
||
if resp.status == 200:
|
||
data = json.loads(resp.read().decode())
|
||
return data.get("content", "(empty)")
|
||
else:
|
||
return f"(HTTP {resp.status})"
|
||
except Exception as e:
|
||
return f"(error: {e})"
|
||
|
||
|
||
def run_tests():
|
||
print("=" * 80)
|
||
print(" EVIL MIKU COMPREHENSIVE MOOD TEST")
|
||
print("=" * 80)
|
||
print(f" Testing {len(EVIL_MOODS)} moods × {len(TEST_MESSAGES)} messages")
|
||
print(f" Cat HTTP: http://{CAT_HOST}:{CAT_PORT}")
|
||
print("=" * 80)
|
||
|
||
results = {}
|
||
|
||
for mood in EVIL_MOODS:
|
||
results[mood] = []
|
||
print(f"\n{'─' * 80}")
|
||
print(f" MOOD: {mood.upper()}")
|
||
print(f"{'─' * 80}")
|
||
|
||
for i, message in enumerate(TEST_MESSAGES):
|
||
print(f"\n [{i+1}/{len(TEST_MESSAGES)}] User: {message}")
|
||
response = query_cat_http(mood, message)
|
||
results[mood].append(response)
|
||
print(f" Evil Miku: {response}")
|
||
|
||
# Summary
|
||
print(f"\n\n{'=' * 80}")
|
||
print(" SUMMARY")
|
||
print(f"{'=' * 80}")
|
||
|
||
# Check for identical responses (the main problem we're trying to fix)
|
||
all_responses = []
|
||
for mood, responses in results.items():
|
||
all_responses.extend(responses)
|
||
|
||
unique = set(all_responses)
|
||
print(f"\n Total responses: {len(all_responses)}")
|
||
print(f" Unique responses: {len(unique)}")
|
||
|
||
if len(unique) < len(all_responses) * 0.7:
|
||
print(" ⚠️ WARNING: Many duplicate responses detected!")
|
||
else:
|
||
print(" ✅ Good variety in responses!")
|
||
|
||
# Check for "*rolls eyes*" only responses
|
||
action_only = [r for r in all_responses if r.strip().startswith("*") and r.strip().endswith("*") and len(r.strip()) < 30]
|
||
if action_only:
|
||
print(f" ⚠️ {len(action_only)} action-only responses (e.g., '*rolls eyes*')")
|
||
else:
|
||
print(" ✅ No action-only responses!")
|
||
|
||
# Average response length
|
||
lengths = [len(r) for r in all_responses if not r.startswith("(")]
|
||
if lengths:
|
||
avg = sum(lengths) / len(lengths)
|
||
print(f" Avg response length: {avg:.0f} chars")
|
||
|
||
print()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
run_tests()
|