add: cheshire-cat configuration, tooling, tests, and documentation
Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
This commit is contained in:
214
cheshire-cat/test_phase2.py
Executable file
214
cheshire-cat/test_phase2.py
Executable file
@@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Phase 2 Test Script
|
||||
|
||||
Tests the Memory Consolidation plugin:
|
||||
1. Send multiple messages (some important, some trivial)
|
||||
2. Manually trigger consolidation
|
||||
3. Verify important memories kept, trivial deleted
|
||||
4. Check if facts were extracted to declarative memory
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
CAT_URL = "http://localhost:1865"
|
||||
TEST_USER_ID = "discord_user_phase2_test"
|
||||
|
||||
|
||||
def send_message(text: str, guild_id: str = "test_guild", description: str = ""):
|
||||
"""Send a message and return response"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"📤 {description}")
|
||||
print(f" Message: '{text}'")
|
||||
|
||||
payload = {
|
||||
"text": text,
|
||||
"user_id": TEST_USER_ID,
|
||||
"metadata": {
|
||||
"guild_id": guild_id,
|
||||
"channel_id": "test_channel"
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{CAT_URL}/message",
|
||||
json=payload,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
print(f" ✅ Response: {result.get('content', '')[:80]}...")
|
||||
return True
|
||||
else:
|
||||
print(f" ❌ Error: {response.status_code}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f" ❌ Exception: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def trigger_consolidation():
|
||||
"""Manually trigger consolidation for testing"""
|
||||
print(f"\n{'='*60}")
|
||||
print("🌙 TRIGGERING CONSOLIDATION")
|
||||
print("="*60)
|
||||
|
||||
try:
|
||||
# Try to trigger via API (if endpoint exists)
|
||||
response = requests.post(
|
||||
f"{CAT_URL}/admin/consolidate",
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
print("✅ Consolidation triggered successfully")
|
||||
return True
|
||||
else:
|
||||
print(f"⚠️ API returned {response.status_code}")
|
||||
print(" (This is expected - no admin endpoint yet)")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not trigger via API: {e}")
|
||||
print(" (This is expected - no admin endpoint yet)")
|
||||
return False
|
||||
|
||||
|
||||
def check_logs():
|
||||
"""Check Docker logs for consolidation output"""
|
||||
print(f"\n{'='*60}")
|
||||
print("📋 CHECKING CONSOLIDATION LOGS")
|
||||
print("="*60)
|
||||
print("\nRun this command manually to check:")
|
||||
print(" docker logs miku_cheshire_cat_test 2>&1 | grep -E '(Consolidation|🌙|✨|💾|🗑️)' | tail -30")
|
||||
|
||||
|
||||
def main():
|
||||
print("="*60)
|
||||
print("PHASE 2 TEST: Memory Consolidation")
|
||||
print("="*60)
|
||||
|
||||
print(f"\n🧪 Testing with user: {TEST_USER_ID}")
|
||||
print(" Sending mix of important and trivial messages")
|
||||
|
||||
# Wait for Cat to be ready
|
||||
time.sleep(2)
|
||||
|
||||
# Test Suite 1: Send varied messages
|
||||
print("\n" + "="*60)
|
||||
print("TEST SUITE 1: Varied Message Types")
|
||||
print("="*60)
|
||||
|
||||
messages = [
|
||||
# Trivial (should be deleted)
|
||||
("lol", "Trivial - pure reaction"),
|
||||
("k", "Trivial - 1 char"),
|
||||
("okay", "Trivial - acknowledgment"),
|
||||
|
||||
# Important (should be kept)
|
||||
("My name is Alice", "Important - personal info"),
|
||||
("I love playing guitar", "Important - hobby/preference"),
|
||||
("My dog died last month", "Important - emotional event"),
|
||||
("I'm studying computer science at MIT", "Important - education"),
|
||||
|
||||
# Medium (depends on context)
|
||||
("What's the weather like?", "Medium - generic question"),
|
||||
("I had pizza for lunch", "Medium - daily activity"),
|
||||
|
||||
# Very important (should definitely be kept)
|
||||
("I'm getting married next month!", "Critical - major life event"),
|
||||
("I've been diagnosed with depression", "Critical - health/emotional"),
|
||||
]
|
||||
|
||||
for text, desc in messages:
|
||||
send_message(text, description=desc)
|
||||
time.sleep(1)
|
||||
|
||||
# Test Suite 2: Trigger consolidation
|
||||
print("\n" + "="*60)
|
||||
print("TEST SUITE 2: Consolidation Trigger")
|
||||
print("="*60)
|
||||
|
||||
trigger_consolidation()
|
||||
|
||||
# Wait for consolidation to complete
|
||||
print("\n⏳ Waiting 10 seconds for consolidation to complete...")
|
||||
time.sleep(10)
|
||||
|
||||
# Test Suite 3: Verify results
|
||||
print("\n" + "="*60)
|
||||
print("TEST SUITE 3: Verification")
|
||||
print("="*60)
|
||||
|
||||
print("\n✅ EXPECTED RESULTS:")
|
||||
print("\n📝 Should be DELETED (trivial):")
|
||||
print(" - 'lol' (pure reaction)")
|
||||
print(" - 'k' (too short)")
|
||||
print(" - 'okay' (acknowledgment)")
|
||||
|
||||
print("\n💾 Should be KEPT (important):")
|
||||
print(" - 'My name is Alice' (importance: 7-8)")
|
||||
print(" - 'I love playing guitar' (importance: 6-7)")
|
||||
print(" - 'My dog died last month' (importance: 9-10)")
|
||||
print(" - 'I'm studying CS at MIT' (importance: 7-8)")
|
||||
print(" - 'I'm getting married!' (importance: 10)")
|
||||
print(" - 'diagnosed with depression' (importance: 10)")
|
||||
|
||||
print("\n📚 Should be extracted as FACTS (declarative memory):")
|
||||
print(" - 'User's name is Alice'")
|
||||
print(" - 'User plays guitar'")
|
||||
print(" - 'User lost their dog recently'")
|
||||
print(" - 'User studies CS at MIT'")
|
||||
print(" - 'User getting married soon'")
|
||||
print(" - 'User has depression'")
|
||||
|
||||
# Check logs
|
||||
check_logs()
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*60)
|
||||
print("MANUAL VERIFICATION STEPS")
|
||||
print("="*60)
|
||||
|
||||
print("""
|
||||
1. Check Docker logs for consolidation output:
|
||||
docker logs miku_cheshire_cat_test 2>&1 | tail -100
|
||||
|
||||
2. Look for these indicators:
|
||||
🌙 [Consolidation] Starting...
|
||||
📊 [Consolidation] Fetching unconsolidated memories
|
||||
✨ [Consolidation] Complete! Stats: ...
|
||||
|
||||
3. Verify in next conversation:
|
||||
Test if Miku remembers:
|
||||
- User's name (Alice)
|
||||
- That user plays guitar
|
||||
- That user is getting married
|
||||
|
||||
Should NOT remember:
|
||||
- 'lol', 'k', 'okay'
|
||||
|
||||
4. Test memory recall:
|
||||
Send: "What do you know about me?"
|
||||
Expected: Mentions name, guitar, upcoming marriage, etc.
|
||||
|
||||
5. Check memory stats:
|
||||
If stats show:
|
||||
- Processed: 11 memories
|
||||
- Kept: 6-7 important ones
|
||||
- Deleted: 4-5 trivial ones
|
||||
- Facts learned: 5-6 facts
|
||||
Then Phase 2 is working! ✅
|
||||
""")
|
||||
|
||||
print("\n✨ Phase 2 testing complete!")
|
||||
print("\nNext: Run verification queries to test memory recall")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user