add: cheshire-cat configuration, tooling, tests, and documentation

Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
2026-03-04 00:51:14 +02:00
parent eafab336b4
commit ae1e0aa144
35 changed files with 6055 additions and 0 deletions
--- a/cheshire-cat/test_full_pipeline.py
+++ b/cheshire-cat/test_full_pipeline.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+"""
+Full pipeline test for Phase 2 memory consolidation with declarative extraction.
+
+Steps:
+1. Tell Miku 20 facts (mix of important and trivial)
+2. Run consolidation to delete trivial messages
+3. Extract facts from consolidated episodic memories
+4. Store facts in declarative memory
+5. Test recall with factual questions
+"""
+
+import requests
+import time
+import sys
+
+CAT_URL = "http://localhost:1865"
+USER_ID = "test_user_pipeline"
+
+# Test messages to tell Miku
+TEST_MESSAGES = [
+    # Important facts (should be remembered)
+    "My name is Sarah Chen.",
+    "I'm 28 years old.",
+    "I live in Seattle, Washington.",
+    "I work as a software engineer at Microsoft.",
+    "My favorite color is forest green.",
+    "I love playing piano. I've been practicing for 15 years.",
+    "I'm learning Japanese! Currently at N3 level.",
+    "I have a cat named Luna.",
+    "I'm allergic to peanuts.",
+    "I prefer cats over dogs, though I like both.",
+    "My favorite food is ramen.",
+    "I enjoy hiking on weekends.",
+    "I graduated from UW in 2018.",
+    "My birthday is March 15th.",
+    
+    # Trivial messages (should be deleted during consolidation)
+    "lol",
+    "k",
+    "haha",
+    "brb",
+    "nice",
+    "cool",
+]
+
+# Questions to test recall
+RECALL_TESTS = [
+    {
+        "question": "What is my name?",
+        "expected": "sarah",
+        "fact_type": "name"
+    },
+    {
+        "question": "How old am I?",
+        "expected": "28",
+        "fact_type": "age"
+    },
+    {
+        "question": "Where do I live?",
+        "expected": "seattle",
+        "fact_type": "location"
+    },
+    {
+        "question": "What do I do for work?",
+        "expected": "software engineer",
+        "fact_type": "job"
+    },
+    {
+        "question": "What is my favorite color?",
+        "expected": "forest green",
+        "fact_type": "favorite_color"
+    },
+    {
+        "question": "What instruments do I play?",
+        "expected": "piano",
+        "fact_type": "hobby"
+    },
+    {
+        "question": "What language am I learning?",
+        "expected": "japanese",
+        "fact_type": "hobby"
+    },
+    {
+        "question": "What is my cat's name?",
+        "expected": "luna",
+        "fact_type": "pet_name"
+    },
+    {
+        "question": "What am I allergic to?",
+        "expected": "peanut",
+        "fact_type": "allergy"
+    },
+    {
+        "question": "Do I prefer cats or dogs?",
+        "expected": "cat",
+        "fact_type": "preference"
+    },
+]
+
+
+def send_message(text: str) -> dict:
+    """Send a message to Miku."""
+    try:
+        response = requests.post(
+            f"{CAT_URL}/message",
+            json={"text": text, "user_id": USER_ID},
+            timeout=30
+        )
+        response.raise_for_status()
+        return response.json()
+    except Exception as e:
+        print(f"    ❌ Error sending message: {e}")
+        return None
+
+
+def trigger_consolidation() -> bool:
+    """Trigger memory consolidation."""
+    try:
+        response = send_message("consolidate now")
+        if response:
+            print("    ✅ Consolidation triggered")
+            return True
+        return False
+    except Exception as e:
+        print(f"    ❌ Error triggering consolidation: {e}")
+        return False
+
+
+def main():
+    print("=" * 80)
+    print("PHASE 2 FULL PIPELINE TEST")
+    print("=" * 80)
+    print(f"Testing with user: {USER_ID}\n")
+    
+    # Step 1: Tell Miku the facts
+    print("STEP 1: Telling Miku facts...")
+    print("-" * 80)
+    successful_sends = 0
+    
+    for i, message in enumerate(TEST_MESSAGES, 1):
+        is_trivial = message in ["lol", "k", "haha", "brb", "nice", "cool"]
+        msg_type = "TRIVIAL" if is_trivial else "IMPORTANT"
+        
+        print(f"[{i}/{len(TEST_MESSAGES)}] {msg_type}: {message}")
+        response = send_message(message)
+        
+        if response:
+            print(f"    ✅ Sent successfully")
+            successful_sends += 1
+        else:
+            print(f"    ❌ Failed to send")
+        
+        time.sleep(1)  # Brief pause between messages
+    
+    print(f"\n✅ Successfully sent {successful_sends}/{len(TEST_MESSAGES)} messages\n")
+    
+    # Step 2: Trigger consolidation
+    print("STEP 2: Triggering consolidation...")
+    print("-" * 80)
+    
+    if not trigger_consolidation():
+        print("❌ Failed to trigger consolidation")
+        sys.exit(1)
+    
+    print("⏳ Waiting for consolidation to complete...")
+    time.sleep(5)
+    print("✅ Consolidation complete\n")
+    
+    # Step 3: Extract and store declarative facts
+    print("STEP 3: Extracting and storing declarative facts...")
+    print("-" * 80)
+    print("Running extract_declarative_facts.py...")
+    
+    import subprocess
+    result = subprocess.run(
+        ["python3", "extract_declarative_facts.py"],
+        capture_output=True,
+        text=True
+    )
+    
+    if result.returncode == 0:
+        # Count extracted facts from output
+        facts_count = result.stdout.count("✅ Extracted from:")
+        print(f"✅ Extracted {facts_count} facts")
+    else:
+        print(f"❌ Extraction failed: {result.stderr[:200]}")
+        sys.exit(1)
+    
+    print("\nRunning store_declarative_facts.py...")
+    result = subprocess.run(
+        ["python3", "store_declarative_facts.py"],
+        capture_output=True,
+        text=True
+    )
+    
+    if result.returncode == 0:
+        # Check for success in output
+        if "Successfully stored:" in result.stdout:
+            stored_line = [l for l in result.stdout.split('\n') if "Successfully stored:" in l][0]
+            print(f"✅ {stored_line.strip()}")
+        else:
+            print("✅ Facts stored")
+    else:
+        print(f"❌ Storage failed: {result.stderr[:200]}")
+        sys.exit(1)
+    
+    print()
+    
+    # Step 4: Test recall
+    print("STEP 4: Testing declarative memory recall...")
+    print("-" * 80)
+    
+    results = []
+    successful_recalls = 0
+    
+    for i, test in enumerate(RECALL_TESTS, 1):
+        question = test["question"]
+        expected = test["expected"].lower()
+        
+        print(f"[{i}/{len(RECALL_TESTS)}] {question}")
+        print(f"    Expected: {expected}")
+        
+        response = send_message(question)
+        
+        if response:
+            answer = response.get('content', '').lower()
+            success = expected in answer
+            
+            if success:
+                print(f"    ✅ RECALLED correctly")
+                successful_recalls += 1
+            else:
+                print(f"    ❌ NOT recalled")
+                print(f"    Response: {answer[:100]}...")
+            
+            results.append({
+                'question': question,
+                'expected': expected,
+                'success': success,
+                'response': response.get('content', '')
+            })
+        else:
+            print(f"    ❌ ERROR - No response")
+            results.append({
+                'question': question,
+                'expected': expected,
+                'success': False,
+                'response': None
+            })
+        
+        print()
+        time.sleep(2)
+    
+    # Final summary
+    print("=" * 80)
+    print("FINAL RESULTS")
+    print("=" * 80)
+    
+    success_rate = (successful_recalls / len(RECALL_TESTS)) * 100
+    
+    print(f"\n📊 RECALL SUCCESS RATE: {successful_recalls}/{len(RECALL_TESTS)} ({success_rate:.1f}%)\n")
+    
+    if success_rate == 100:
+        print("🎉 PERFECT! All facts recalled correctly!")
+    elif success_rate >= 80:
+        print("✅ EXCELLENT! Most facts recalled correctly.")
+    elif success_rate >= 50:
+        print("⚠️ PARTIAL SUCCESS - Needs improvement.")
+    else:
+        print("❌ POOR PERFORMANCE - System needs significant fixes.")
+    
+    print("\nDetailed results:")
+    print("-" * 80)
+    
+    for result in results:
+        status = "✅" if result['success'] else "❌"
+        print(f"{status} {result['question']}")
+        if not result['success'] and result['response']:
+            print(f"   Response: {result['response'][:150]}...")
+    
+    print("\n" + "=" * 80)
+    
+    if success_rate == 100:
+        print("✅ PHASE 2 COMPLETE AND READY FOR PRODUCTION!")
+    elif success_rate >= 80:
+        print("✅ PHASE 2 MOSTLY WORKING - Minor refinements needed")
+    else:
+        print("❌ PHASE 2 NEEDS MORE WORK")
+    
+    print("=" * 80)
+
+
+if __name__ == "__main__":
+    main()