Files
miku-discord/cheshire-cat/test_phase2.py

215 lines
6.3 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Phase 2 Test Script
Tests the Memory Consolidation plugin:
1. Send multiple messages (some important, some trivial)
2. Manually trigger consolidation
3. Verify important memories kept, trivial deleted
4. Check if facts were extracted to declarative memory
"""
import requests
import json
import time
from datetime import datetime
CAT_URL = "http://localhost:1865"
TEST_USER_ID = "discord_user_phase2_test"
def send_message(text: str, guild_id: str = "test_guild", description: str = ""):
"""Send a message and return response"""
print(f"\n{'='*60}")
print(f"📤 {description}")
print(f" Message: '{text}'")
payload = {
"text": text,
"user_id": TEST_USER_ID,
"metadata": {
"guild_id": guild_id,
"channel_id": "test_channel"
}
}
try:
response = requests.post(
f"{CAT_URL}/message",
json=payload,
timeout=30
)
if response.status_code == 200:
result = response.json()
print(f" ✅ Response: {result.get('content', '')[:80]}...")
return True
else:
print(f" ❌ Error: {response.status_code}")
return False
except Exception as e:
print(f" ❌ Exception: {e}")
return False
def trigger_consolidation():
"""Manually trigger consolidation for testing"""
print(f"\n{'='*60}")
print("🌙 TRIGGERING CONSOLIDATION")
print("="*60)
try:
# Try to trigger via API (if endpoint exists)
response = requests.post(
f"{CAT_URL}/admin/consolidate",
timeout=60
)
if response.status_code == 200:
print("✅ Consolidation triggered successfully")
return True
else:
print(f"⚠️ API returned {response.status_code}")
print(" (This is expected - no admin endpoint yet)")
return False
except Exception as e:
print(f"⚠️ Could not trigger via API: {e}")
print(" (This is expected - no admin endpoint yet)")
return False
def check_logs():
"""Check Docker logs for consolidation output"""
print(f"\n{'='*60}")
print("📋 CHECKING CONSOLIDATION LOGS")
print("="*60)
print("\nRun this command manually to check:")
print(" docker logs miku_cheshire_cat_test 2>&1 | grep -E '(Consolidation|🌙|✨|💾|🗑️)' | tail -30")
def main():
print("="*60)
print("PHASE 2 TEST: Memory Consolidation")
print("="*60)
print(f"\n🧪 Testing with user: {TEST_USER_ID}")
print(" Sending mix of important and trivial messages")
# Wait for Cat to be ready
time.sleep(2)
# Test Suite 1: Send varied messages
print("\n" + "="*60)
print("TEST SUITE 1: Varied Message Types")
print("="*60)
messages = [
# Trivial (should be deleted)
("lol", "Trivial - pure reaction"),
("k", "Trivial - 1 char"),
("okay", "Trivial - acknowledgment"),
# Important (should be kept)
("My name is Alice", "Important - personal info"),
("I love playing guitar", "Important - hobby/preference"),
("My dog died last month", "Important - emotional event"),
("I'm studying computer science at MIT", "Important - education"),
# Medium (depends on context)
("What's the weather like?", "Medium - generic question"),
("I had pizza for lunch", "Medium - daily activity"),
# Very important (should definitely be kept)
("I'm getting married next month!", "Critical - major life event"),
("I've been diagnosed with depression", "Critical - health/emotional"),
]
for text, desc in messages:
send_message(text, description=desc)
time.sleep(1)
# Test Suite 2: Trigger consolidation
print("\n" + "="*60)
print("TEST SUITE 2: Consolidation Trigger")
print("="*60)
trigger_consolidation()
# Wait for consolidation to complete
print("\n⏳ Waiting 10 seconds for consolidation to complete...")
time.sleep(10)
# Test Suite 3: Verify results
print("\n" + "="*60)
print("TEST SUITE 3: Verification")
print("="*60)
print("\n✅ EXPECTED RESULTS:")
print("\n📝 Should be DELETED (trivial):")
print(" - 'lol' (pure reaction)")
print(" - 'k' (too short)")
print(" - 'okay' (acknowledgment)")
print("\n💾 Should be KEPT (important):")
print(" - 'My name is Alice' (importance: 7-8)")
print(" - 'I love playing guitar' (importance: 6-7)")
print(" - 'My dog died last month' (importance: 9-10)")
print(" - 'I'm studying CS at MIT' (importance: 7-8)")
print(" - 'I'm getting married!' (importance: 10)")
print(" - 'diagnosed with depression' (importance: 10)")
print("\n📚 Should be extracted as FACTS (declarative memory):")
print(" - 'User's name is Alice'")
print(" - 'User plays guitar'")
print(" - 'User lost their dog recently'")
print(" - 'User studies CS at MIT'")
print(" - 'User getting married soon'")
print(" - 'User has depression'")
# Check logs
check_logs()
# Summary
print("\n" + "="*60)
print("MANUAL VERIFICATION STEPS")
print("="*60)
print("""
1. Check Docker logs for consolidation output:
docker logs miku_cheshire_cat_test 2>&1 | tail -100
2. Look for these indicators:
🌙 [Consolidation] Starting...
📊 [Consolidation] Fetching unconsolidated memories
[Consolidation] Complete! Stats: ...
3. Verify in next conversation:
Test if Miku remembers:
- User's name (Alice)
- That user plays guitar
- That user is getting married
Should NOT remember:
- 'lol', 'k', 'okay'
4. Test memory recall:
Send: "What do you know about me?"
Expected: Mentions name, guitar, upcoming marriage, etc.
5. Check memory stats:
If stats show:
- Processed: 11 memories
- Kept: 6-7 important ones
- Deleted: 4-5 trivial ones
- Facts learned: 5-6 facts
Then Phase 2 is working!
""")
print("\n✨ Phase 2 testing complete!")
print("\nNext: Run verification queries to test memory recall")
if __name__ == "__main__":
main()