add: cheshire-cat configuration, tooling, tests, and documentation

Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
2026-03-04 00:51:14 +02:00
parent eafab336b4
commit ae1e0aa144
35 changed files with 6055 additions and 0 deletions
--- a/cheshire-cat/test_phase2.py
+++ b/cheshire-cat/test_phase2.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+"""
+Phase 2 Test Script
+
+Tests the Memory Consolidation plugin:
+1. Send multiple messages (some important, some trivial)
+2. Manually trigger consolidation
+3. Verify important memories kept, trivial deleted
+4. Check if facts were extracted to declarative memory
+"""
+
+import requests
+import json
+import time
+from datetime import datetime
+
+
+CAT_URL = "http://localhost:1865"
+TEST_USER_ID = "discord_user_phase2_test"
+
+
+def send_message(text: str, guild_id: str = "test_guild", description: str = ""):
+    """Send a message and return response"""
+    print(f"\n{'='*60}")
+    print(f"📤 {description}")
+    print(f"   Message: '{text}'")
+    
+    payload = {
+        "text": text,
+        "user_id": TEST_USER_ID,
+        "metadata": {
+            "guild_id": guild_id,
+            "channel_id": "test_channel"
+        }
+    }
+    
+    try:
+        response = requests.post(
+            f"{CAT_URL}/message",
+            json=payload,
+            timeout=30
+        )
+        
+        if response.status_code == 200:
+            result = response.json()
+            print(f"   ✅ Response: {result.get('content', '')[:80]}...")
+            return True
+        else:
+            print(f"   ❌ Error: {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"   ❌ Exception: {e}")
+        return False
+
+
+def trigger_consolidation():
+    """Manually trigger consolidation for testing"""
+    print(f"\n{'='*60}")
+    print("🌙 TRIGGERING CONSOLIDATION")
+    print("="*60)
+    
+    try:
+        # Try to trigger via API (if endpoint exists)
+        response = requests.post(
+            f"{CAT_URL}/admin/consolidate",
+            timeout=60
+        )
+        
+        if response.status_code == 200:
+            print("✅ Consolidation triggered successfully")
+            return True
+        else:
+            print(f"⚠️  API returned {response.status_code}")
+            print("   (This is expected - no admin endpoint yet)")
+            return False
+    except Exception as e:
+        print(f"⚠️  Could not trigger via API: {e}")
+        print("   (This is expected - no admin endpoint yet)")
+        return False
+
+
+def check_logs():
+    """Check Docker logs for consolidation output"""
+    print(f"\n{'='*60}")
+    print("📋 CHECKING CONSOLIDATION LOGS")
+    print("="*60)
+    print("\nRun this command manually to check:")
+    print("   docker logs miku_cheshire_cat_test 2>&1 | grep -E '(Consolidation|🌙|✨|💾|🗑️)' | tail -30")
+
+
+def main():
+    print("="*60)
+    print("PHASE 2 TEST: Memory Consolidation")
+    print("="*60)
+    
+    print(f"\n🧪 Testing with user: {TEST_USER_ID}")
+    print("   Sending mix of important and trivial messages")
+    
+    # Wait for Cat to be ready
+    time.sleep(2)
+    
+    # Test Suite 1: Send varied messages
+    print("\n" + "="*60)
+    print("TEST SUITE 1: Varied Message Types")
+    print("="*60)
+    
+    messages = [
+        # Trivial (should be deleted)
+        ("lol", "Trivial - pure reaction"),
+        ("k", "Trivial - 1 char"),
+        ("okay", "Trivial - acknowledgment"),
+        
+        # Important (should be kept)
+        ("My name is Alice", "Important - personal info"),
+        ("I love playing guitar", "Important - hobby/preference"),
+        ("My dog died last month", "Important - emotional event"),
+        ("I'm studying computer science at MIT", "Important - education"),
+        
+        # Medium (depends on context)
+        ("What's the weather like?", "Medium - generic question"),
+        ("I had pizza for lunch", "Medium - daily activity"),
+        
+        # Very important (should definitely be kept)
+        ("I'm getting married next month!", "Critical - major life event"),
+        ("I've been diagnosed with depression", "Critical - health/emotional"),
+    ]
+    
+    for text, desc in messages:
+        send_message(text, description=desc)
+        time.sleep(1)
+    
+    # Test Suite 2: Trigger consolidation
+    print("\n" + "="*60)
+    print("TEST SUITE 2: Consolidation Trigger")
+    print("="*60)
+    
+    trigger_consolidation()
+    
+    # Wait for consolidation to complete
+    print("\n⏳ Waiting 10 seconds for consolidation to complete...")
+    time.sleep(10)
+    
+    # Test Suite 3: Verify results
+    print("\n" + "="*60)
+    print("TEST SUITE 3: Verification")
+    print("="*60)
+    
+    print("\n✅ EXPECTED RESULTS:")
+    print("\n📝 Should be DELETED (trivial):")
+    print("   - 'lol' (pure reaction)")
+    print("   - 'k' (too short)")
+    print("   - 'okay' (acknowledgment)")
+    
+    print("\n💾 Should be KEPT (important):")
+    print("   - 'My name is Alice' (importance: 7-8)")
+    print("   - 'I love playing guitar' (importance: 6-7)")
+    print("   - 'My dog died last month' (importance: 9-10)")
+    print("   - 'I'm studying CS at MIT' (importance: 7-8)")
+    print("   - 'I'm getting married!' (importance: 10)")
+    print("   - 'diagnosed with depression' (importance: 10)")
+    
+    print("\n📚 Should be extracted as FACTS (declarative memory):")
+    print("   - 'User's name is Alice'")
+    print("   - 'User plays guitar'")
+    print("   - 'User lost their dog recently'")
+    print("   - 'User studies CS at MIT'")
+    print("   - 'User getting married soon'")
+    print("   - 'User has depression'")
+    
+    # Check logs
+    check_logs()
+    
+    # Summary
+    print("\n" + "="*60)
+    print("MANUAL VERIFICATION STEPS")
+    print("="*60)
+    
+    print("""
+1. Check Docker logs for consolidation output:
+   docker logs miku_cheshire_cat_test 2>&1 | tail -100
+
+2. Look for these indicators:
+   🌙 [Consolidation] Starting...
+   📊 [Consolidation] Fetching unconsolidated memories
+   ✨ [Consolidation] Complete! Stats: ...
+
+3. Verify in next conversation:
+   Test if Miku remembers:
+   - User's name (Alice)
+   - That user plays guitar
+   - That user is getting married
+   
+   Should NOT remember:
+   - 'lol', 'k', 'okay'
+
+4. Test memory recall:
+   Send: "What do you know about me?"
+   Expected: Mentions name, guitar, upcoming marriage, etc.
+
+5. Check memory stats:
+   If stats show:
+   - Processed: 11 memories
+   - Kept: 6-7 important ones
+   - Deleted: 4-5 trivial ones
+   - Facts learned: 5-6 facts
+   Then Phase 2 is working! ✅
+""")
+    
+    print("\n✨ Phase 2 testing complete!")
+    print("\nNext: Run verification queries to test memory recall")
+
+
+if __name__ == "__main__":
+    main()