add: cheshire-cat configuration, tooling, tests, and documentation

Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
2026-03-04 00:51:14 +02:00
parent eafab336b4
commit ae1e0aa144
35 changed files with 6055 additions and 0 deletions
--- a/cheshire-cat/test_declarative_recall.py
+++ b/cheshire-cat/test_declarative_recall.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+"""
+Test if declarative facts can be recalled by asking factual questions.
+This tests the CRITICAL fix for Phase 2 memory consolidation.
+"""
+
+import requests
+import time
+
+CAT_URL = "http://localhost:1865"
+USER_ID = "test_user_declarative"
+
+def ask_cat(question: str) -> dict:
+    """Send a question to Cat and get the response."""
+    try:
+        response = requests.post(
+            f"{CAT_URL}/message",
+            json={"text": question, "user_id": USER_ID},
+            timeout=30
+        )
+        response.raise_for_status()
+        return response.json()
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        return None
+
+def main():
+    print("=" * 70)
+    print("DECLARATIVE MEMORY RECALL TEST")
+    print("=" * 70)
+    print("Testing if Cat can recall stored declarative facts...\n")
+    
+    test_questions = [
+        {
+            "question": "What is my favorite color?",
+            "expected": "forest",
+            "fact_type": "favorite_color"
+        },
+        {
+            "question": "Where do I work?",
+            "expected": "software engineer",
+            "fact_type": "job"
+        },
+        {
+            "question": "What are my hobbies?",
+            "expected": "piano, japanese",
+            "fact_type": "hobby"
+        },
+        {
+            "question": "Do I prefer cats or dogs?",
+            "expected": "cats",
+            "fact_type": "preference"
+        },
+    ]
+    
+    results = []
+    
+    for i, test in enumerate(test_questions, 1):
+        print(f"[{i}/{len(test_questions)}] Testing: {test['question']}")
+        print(f"   Expected: {test['expected']}")
+        
+        response = ask_cat(test['question'])
+        
+        if response:
+            answer = response.get('content', '')
+            print(f"   Response: {answer[:100]}...")
+            
+            # Check if expected content is in response
+            success = test['expected'].lower() in answer.lower()
+            results.append({
+                'question': test['question'],
+                'success': success,
+                'response': answer
+            })
+            
+            if success:
+                print(f"   ✅ SUCCESS - Found '{test['expected']}' in response")
+            else:
+                print(f"   ❌ FAIL - Did not find '{test['expected']}' in response")
+        else:
+            print(f"   ❌ ERROR - No response from Cat")
+            results.append({
+                'question': test['question'],
+                'success': False,
+                'response': None
+            })
+        
+        print()
+        time.sleep(2)  # Brief pause between questions
+    
+    # Summary
+    print("=" * 70)
+    print("TEST SUMMARY")
+    print("=" * 70)
+    
+    success_count = sum(1 for r in results if r['success'])
+    total_count = len(results)
+    
+    print(f"✅ Successful recalls: {success_count}/{total_count}")
+    print(f"❌ Failed recalls: {total_count - success_count}/{total_count}")
+    
+    if success_count == total_count:
+        print("\n🎉 ALL TESTS PASSED! Declarative memory recall is working!")
+    elif success_count > 0:
+        print(f"\n⚠️ PARTIAL SUCCESS: {success_count}/{total_count} recalls working")
+    else:
+        print("\n❌ ALL TESTS FAILED: Declarative recall not working")
+    
+    print("\n" + "=" * 70)
+    print("DETAILED RESULTS")
+    print("=" * 70)
+    
+    for result in results:
+        status = "✅ PASS" if result['success'] else "❌ FAIL"
+        print(f"\n{status}: {result['question']}")
+        if result['response']:
+            print(f"   Response: {result['response'][:200]}...")
+
+if __name__ == "__main__":
+    main()