Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
215 lines
6.3 KiB
Python
Executable File
215 lines
6.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Phase 2 Test Script
|
|
|
|
Tests the Memory Consolidation plugin:
|
|
1. Send multiple messages (some important, some trivial)
|
|
2. Manually trigger consolidation
|
|
3. Verify important memories kept, trivial deleted
|
|
4. Check if facts were extracted to declarative memory
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import time
|
|
from datetime import datetime
|
|
|
|
|
|
CAT_URL = "http://localhost:1865"
|
|
TEST_USER_ID = "discord_user_phase2_test"
|
|
|
|
|
|
def send_message(text: str, guild_id: str = "test_guild", description: str = ""):
|
|
"""Send a message and return response"""
|
|
print(f"\n{'='*60}")
|
|
print(f"📤 {description}")
|
|
print(f" Message: '{text}'")
|
|
|
|
payload = {
|
|
"text": text,
|
|
"user_id": TEST_USER_ID,
|
|
"metadata": {
|
|
"guild_id": guild_id,
|
|
"channel_id": "test_channel"
|
|
}
|
|
}
|
|
|
|
try:
|
|
response = requests.post(
|
|
f"{CAT_URL}/message",
|
|
json=payload,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
print(f" ✅ Response: {result.get('content', '')[:80]}...")
|
|
return True
|
|
else:
|
|
print(f" ❌ Error: {response.status_code}")
|
|
return False
|
|
except Exception as e:
|
|
print(f" ❌ Exception: {e}")
|
|
return False
|
|
|
|
|
|
def trigger_consolidation():
|
|
"""Manually trigger consolidation for testing"""
|
|
print(f"\n{'='*60}")
|
|
print("🌙 TRIGGERING CONSOLIDATION")
|
|
print("="*60)
|
|
|
|
try:
|
|
# Try to trigger via API (if endpoint exists)
|
|
response = requests.post(
|
|
f"{CAT_URL}/admin/consolidate",
|
|
timeout=60
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
print("✅ Consolidation triggered successfully")
|
|
return True
|
|
else:
|
|
print(f"⚠️ API returned {response.status_code}")
|
|
print(" (This is expected - no admin endpoint yet)")
|
|
return False
|
|
except Exception as e:
|
|
print(f"⚠️ Could not trigger via API: {e}")
|
|
print(" (This is expected - no admin endpoint yet)")
|
|
return False
|
|
|
|
|
|
def check_logs():
|
|
"""Check Docker logs for consolidation output"""
|
|
print(f"\n{'='*60}")
|
|
print("📋 CHECKING CONSOLIDATION LOGS")
|
|
print("="*60)
|
|
print("\nRun this command manually to check:")
|
|
print(" docker logs miku_cheshire_cat_test 2>&1 | grep -E '(Consolidation|🌙|✨|💾|🗑️)' | tail -30")
|
|
|
|
|
|
def main():
|
|
print("="*60)
|
|
print("PHASE 2 TEST: Memory Consolidation")
|
|
print("="*60)
|
|
|
|
print(f"\n🧪 Testing with user: {TEST_USER_ID}")
|
|
print(" Sending mix of important and trivial messages")
|
|
|
|
# Wait for Cat to be ready
|
|
time.sleep(2)
|
|
|
|
# Test Suite 1: Send varied messages
|
|
print("\n" + "="*60)
|
|
print("TEST SUITE 1: Varied Message Types")
|
|
print("="*60)
|
|
|
|
messages = [
|
|
# Trivial (should be deleted)
|
|
("lol", "Trivial - pure reaction"),
|
|
("k", "Trivial - 1 char"),
|
|
("okay", "Trivial - acknowledgment"),
|
|
|
|
# Important (should be kept)
|
|
("My name is Alice", "Important - personal info"),
|
|
("I love playing guitar", "Important - hobby/preference"),
|
|
("My dog died last month", "Important - emotional event"),
|
|
("I'm studying computer science at MIT", "Important - education"),
|
|
|
|
# Medium (depends on context)
|
|
("What's the weather like?", "Medium - generic question"),
|
|
("I had pizza for lunch", "Medium - daily activity"),
|
|
|
|
# Very important (should definitely be kept)
|
|
("I'm getting married next month!", "Critical - major life event"),
|
|
("I've been diagnosed with depression", "Critical - health/emotional"),
|
|
]
|
|
|
|
for text, desc in messages:
|
|
send_message(text, description=desc)
|
|
time.sleep(1)
|
|
|
|
# Test Suite 2: Trigger consolidation
|
|
print("\n" + "="*60)
|
|
print("TEST SUITE 2: Consolidation Trigger")
|
|
print("="*60)
|
|
|
|
trigger_consolidation()
|
|
|
|
# Wait for consolidation to complete
|
|
print("\n⏳ Waiting 10 seconds for consolidation to complete...")
|
|
time.sleep(10)
|
|
|
|
# Test Suite 3: Verify results
|
|
print("\n" + "="*60)
|
|
print("TEST SUITE 3: Verification")
|
|
print("="*60)
|
|
|
|
print("\n✅ EXPECTED RESULTS:")
|
|
print("\n📝 Should be DELETED (trivial):")
|
|
print(" - 'lol' (pure reaction)")
|
|
print(" - 'k' (too short)")
|
|
print(" - 'okay' (acknowledgment)")
|
|
|
|
print("\n💾 Should be KEPT (important):")
|
|
print(" - 'My name is Alice' (importance: 7-8)")
|
|
print(" - 'I love playing guitar' (importance: 6-7)")
|
|
print(" - 'My dog died last month' (importance: 9-10)")
|
|
print(" - 'I'm studying CS at MIT' (importance: 7-8)")
|
|
print(" - 'I'm getting married!' (importance: 10)")
|
|
print(" - 'diagnosed with depression' (importance: 10)")
|
|
|
|
print("\n📚 Should be extracted as FACTS (declarative memory):")
|
|
print(" - 'User's name is Alice'")
|
|
print(" - 'User plays guitar'")
|
|
print(" - 'User lost their dog recently'")
|
|
print(" - 'User studies CS at MIT'")
|
|
print(" - 'User getting married soon'")
|
|
print(" - 'User has depression'")
|
|
|
|
# Check logs
|
|
check_logs()
|
|
|
|
# Summary
|
|
print("\n" + "="*60)
|
|
print("MANUAL VERIFICATION STEPS")
|
|
print("="*60)
|
|
|
|
print("""
|
|
1. Check Docker logs for consolidation output:
|
|
docker logs miku_cheshire_cat_test 2>&1 | tail -100
|
|
|
|
2. Look for these indicators:
|
|
🌙 [Consolidation] Starting...
|
|
📊 [Consolidation] Fetching unconsolidated memories
|
|
✨ [Consolidation] Complete! Stats: ...
|
|
|
|
3. Verify in next conversation:
|
|
Test if Miku remembers:
|
|
- User's name (Alice)
|
|
- That user plays guitar
|
|
- That user is getting married
|
|
|
|
Should NOT remember:
|
|
- 'lol', 'k', 'okay'
|
|
|
|
4. Test memory recall:
|
|
Send: "What do you know about me?"
|
|
Expected: Mentions name, guitar, upcoming marriage, etc.
|
|
|
|
5. Check memory stats:
|
|
If stats show:
|
|
- Processed: 11 memories
|
|
- Kept: 6-7 important ones
|
|
- Deleted: 4-5 trivial ones
|
|
- Facts learned: 5-6 facts
|
|
Then Phase 2 is working! ✅
|
|
""")
|
|
|
|
print("\n✨ Phase 2 testing complete!")
|
|
print("\nNext: Run verification queries to test memory recall")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|