add: cheshire-cat configuration, tooling, tests, and documentation
Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
This commit is contained in:
120
cheshire-cat/test_declarative_recall.py
Normal file
120
cheshire-cat/test_declarative_recall.py
Normal file
@@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test if declarative facts can be recalled by asking factual questions.
|
||||
This tests the CRITICAL fix for Phase 2 memory consolidation.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import time
|
||||
|
||||
CAT_URL = "http://localhost:1865"
|
||||
USER_ID = "test_user_declarative"
|
||||
|
||||
def ask_cat(question: str) -> dict:
|
||||
"""Send a question to Cat and get the response."""
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{CAT_URL}/message",
|
||||
json={"text": question, "user_id": USER_ID},
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
return None
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("DECLARATIVE MEMORY RECALL TEST")
|
||||
print("=" * 70)
|
||||
print("Testing if Cat can recall stored declarative facts...\n")
|
||||
|
||||
test_questions = [
|
||||
{
|
||||
"question": "What is my favorite color?",
|
||||
"expected": "forest",
|
||||
"fact_type": "favorite_color"
|
||||
},
|
||||
{
|
||||
"question": "Where do I work?",
|
||||
"expected": "software engineer",
|
||||
"fact_type": "job"
|
||||
},
|
||||
{
|
||||
"question": "What are my hobbies?",
|
||||
"expected": "piano, japanese",
|
||||
"fact_type": "hobby"
|
||||
},
|
||||
{
|
||||
"question": "Do I prefer cats or dogs?",
|
||||
"expected": "cats",
|
||||
"fact_type": "preference"
|
||||
},
|
||||
]
|
||||
|
||||
results = []
|
||||
|
||||
for i, test in enumerate(test_questions, 1):
|
||||
print(f"[{i}/{len(test_questions)}] Testing: {test['question']}")
|
||||
print(f" Expected: {test['expected']}")
|
||||
|
||||
response = ask_cat(test['question'])
|
||||
|
||||
if response:
|
||||
answer = response.get('content', '')
|
||||
print(f" Response: {answer[:100]}...")
|
||||
|
||||
# Check if expected content is in response
|
||||
success = test['expected'].lower() in answer.lower()
|
||||
results.append({
|
||||
'question': test['question'],
|
||||
'success': success,
|
||||
'response': answer
|
||||
})
|
||||
|
||||
if success:
|
||||
print(f" ✅ SUCCESS - Found '{test['expected']}' in response")
|
||||
else:
|
||||
print(f" ❌ FAIL - Did not find '{test['expected']}' in response")
|
||||
else:
|
||||
print(f" ❌ ERROR - No response from Cat")
|
||||
results.append({
|
||||
'question': test['question'],
|
||||
'success': False,
|
||||
'response': None
|
||||
})
|
||||
|
||||
print()
|
||||
time.sleep(2) # Brief pause between questions
|
||||
|
||||
# Summary
|
||||
print("=" * 70)
|
||||
print("TEST SUMMARY")
|
||||
print("=" * 70)
|
||||
|
||||
success_count = sum(1 for r in results if r['success'])
|
||||
total_count = len(results)
|
||||
|
||||
print(f"✅ Successful recalls: {success_count}/{total_count}")
|
||||
print(f"❌ Failed recalls: {total_count - success_count}/{total_count}")
|
||||
|
||||
if success_count == total_count:
|
||||
print("\n🎉 ALL TESTS PASSED! Declarative memory recall is working!")
|
||||
elif success_count > 0:
|
||||
print(f"\n⚠️ PARTIAL SUCCESS: {success_count}/{total_count} recalls working")
|
||||
else:
|
||||
print("\n❌ ALL TESTS FAILED: Declarative recall not working")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("DETAILED RESULTS")
|
||||
print("=" * 70)
|
||||
|
||||
for result in results:
|
||||
status = "✅ PASS" if result['success'] else "❌ FAIL"
|
||||
print(f"\n{status}: {result['question']}")
|
||||
if result['response']:
|
||||
print(f" Response: {result['response'][:200]}...")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user