cheshire-cat/test_declarative_recall.py

#!/usr/bin/env python3
"""
Test if declarative facts can be recalled by asking factual questions.
This tests the CRITICAL fix for Phase 2 memory consolidation.
"""

import requests
import time

CAT_URL = "http://localhost:1865"
USER_ID = "test_user_declarative"

def ask_cat(question: str) -> dict:
    """Send a question to Cat and get the response."""
    try:
        response = requests.post(
            f"{CAT_URL}/message",
            json={"text": question, "user_id": USER_ID},
            timeout=30
        )
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"❌ Error: {e}")
        return None

def main():
    print("=" * 70)
    print("DECLARATIVE MEMORY RECALL TEST")
    print("=" * 70)
    print("Testing if Cat can recall stored declarative facts...\n")
    
    test_questions = [
        {
            "question": "What is my favorite color?",
            "expected": "forest",
            "fact_type": "favorite_color"
        },
        {
            "question": "Where do I work?",
            "expected": "software engineer",
            "fact_type": "job"
        },
        {
            "question": "What are my hobbies?",
            "expected": "piano, japanese",
            "fact_type": "hobby"
        },
        {
            "question": "Do I prefer cats or dogs?",
            "expected": "cats",
            "fact_type": "preference"
        },
    ]
    
    results = []
    
    for i, test in enumerate(test_questions, 1):
        print(f"[{i}/{len(test_questions)}] Testing: {test['question']}")
        print(f"   Expected: {test['expected']}")
        
        response = ask_cat(test['question'])
        
        if response:
            answer = response.get('content', '')
            print(f"   Response: {answer[:100]}...")
            
            # Check if expected content is in response
            success = test['expected'].lower() in answer.lower()
            results.append({
                'question': test['question'],
                'success': success,
                'response': answer
            })
            
            if success:
                print(f"   ✅ SUCCESS - Found '{test['expected']}' in response")
            else:
                print(f"   ❌ FAIL - Did not find '{test['expected']}' in response")
        else:
            print(f"   ❌ ERROR - No response from Cat")
            results.append({
                'question': test['question'],
                'success': False,
                'response': None
            })
        
        print()
        time.sleep(2)  # Brief pause between questions
    
    # Summary
    print("=" * 70)
    print("TEST SUMMARY")
    print("=" * 70)
    
    success_count = sum(1 for r in results if r['success'])
    total_count = len(results)
    
    print(f"✅ Successful recalls: {success_count}/{total_count}")
    print(f"❌ Failed recalls: {total_count - success_count}/{total_count}")
    
    if success_count == total_count:
        print("\n🎉 ALL TESTS PASSED! Declarative memory recall is working!")
    elif success_count > 0:
        print(f"\n⚠️ PARTIAL SUCCESS: {success_count}/{total_count} recalls working")
    else:
        print("\n❌ ALL TESTS FAILED: Declarative recall not working")
    
    print("\n" + "=" * 70)
    print("DETAILED RESULTS")
    print("=" * 70)
    
    for result in results:
        status = "✅ PASS" if result['success'] else "❌ FAIL"
        print(f"\n{status}: {result['question']}")
        if result['response']:
            print(f"   Response: {result['response'][:200]}...")

if __name__ == "__main__":
    main()
add: cheshire-cat configuration, tooling, tests, and documentation Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md 2026-03-04 00:51:14 +02:00			`#!/usr/bin/env python3`
			`"""`
			`Test if declarative facts can be recalled by asking factual questions.`
			`This tests the CRITICAL fix for Phase 2 memory consolidation.`
			`"""`

			`import requests`
			`import time`

			`CAT_URL = "http://localhost:1865"`
			`USER_ID = "test_user_declarative"`

			`def ask_cat(question: str) -> dict:`
			`"""Send a question to Cat and get the response."""`
			`try:`
			`response = requests.post(`
			`f"{CAT_URL}/message",`
			`json={"text": question, "user_id": USER_ID},`
			`timeout=30`
			`)`
			`response.raise_for_status()`
			`return response.json()`
			`except Exception as e:`
			`print(f"❌ Error: {e}")`
			`return None`

			`def main():`
			`print("=" * 70)`
			`print("DECLARATIVE MEMORY RECALL TEST")`
			`print("=" * 70)`
			`print("Testing if Cat can recall stored declarative facts...\n")`

			`test_questions = [`
			`{`
			`"question": "What is my favorite color?",`
			`"expected": "forest",`
			`"fact_type": "favorite_color"`
			`},`
			`{`
			`"question": "Where do I work?",`
			`"expected": "software engineer",`
			`"fact_type": "job"`
			`},`
			`{`
			`"question": "What are my hobbies?",`
			`"expected": "piano, japanese",`
			`"fact_type": "hobby"`
			`},`
			`{`
			`"question": "Do I prefer cats or dogs?",`
			`"expected": "cats",`
			`"fact_type": "preference"`
			`},`
			`]`

			`results = []`

			`for i, test in enumerate(test_questions, 1):`
			`print(f"[{i}/{len(test_questions)}] Testing: {test['question']}")`
			`print(f" Expected: {test['expected']}")`

			`response = ask_cat(test['question'])`

			`if response:`
			`answer = response.get('content', '')`
			`print(f" Response: {answer[:100]}...")`

			`# Check if expected content is in response`
			`success = test['expected'].lower() in answer.lower()`
			`results.append({`
			`'question': test['question'],`
			`'success': success,`
			`'response': answer`
			`})`

			`if success:`
			`print(f" ✅ SUCCESS - Found '{test['expected']}' in response")`
			`else:`
			`print(f" ❌ FAIL - Did not find '{test['expected']}' in response")`
			`else:`
			`print(f" ❌ ERROR - No response from Cat")`
			`results.append({`
			`'question': test['question'],`
			`'success': False,`
			`'response': None`
			`})`

			`print()`
			`time.sleep(2) # Brief pause between questions`

			`# Summary`
			`print("=" * 70)`
			`print("TEST SUMMARY")`
			`print("=" * 70)`

			`success_count = sum(1 for r in results if r['success'])`
			`total_count = len(results)`

			`print(f"✅ Successful recalls: {success_count}/{total_count}")`
			`print(f"❌ Failed recalls: {total_count - success_count}/{total_count}")`

			`if success_count == total_count:`
			`print("\n🎉 ALL TESTS PASSED! Declarative memory recall is working!")`
			`elif success_count > 0:`
			`print(f"\n⚠️ PARTIAL SUCCESS: {success_count}/{total_count} recalls working")`
			`else:`
			`print("\n❌ ALL TESTS FAILED: Declarative recall not working")`

			`print("\n" + "=" * 70)`
			`print("DETAILED RESULTS")`
			`print("=" * 70)`

			`for result in results:`
			`status = "✅ PASS" if result['success'] else "❌ FAIL"`
			`print(f"\n{status}: {result['question']}")`
			`if result['response']:`
			`print(f" Response: {result['response'][:200]}...")`

			`if __name__ == "__main__":`
			`main()`