Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
121 lines
3.6 KiB
Python
121 lines
3.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test if declarative facts can be recalled by asking factual questions.
|
|
This tests the CRITICAL fix for Phase 2 memory consolidation.
|
|
"""
|
|
|
|
import requests
|
|
import time
|
|
|
|
CAT_URL = "http://localhost:1865"
|
|
USER_ID = "test_user_declarative"
|
|
|
|
def ask_cat(question: str) -> dict:
|
|
"""Send a question to Cat and get the response."""
|
|
try:
|
|
response = requests.post(
|
|
f"{CAT_URL}/message",
|
|
json={"text": question, "user_id": USER_ID},
|
|
timeout=30
|
|
)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
return None
|
|
|
|
def main():
|
|
print("=" * 70)
|
|
print("DECLARATIVE MEMORY RECALL TEST")
|
|
print("=" * 70)
|
|
print("Testing if Cat can recall stored declarative facts...\n")
|
|
|
|
test_questions = [
|
|
{
|
|
"question": "What is my favorite color?",
|
|
"expected": "forest",
|
|
"fact_type": "favorite_color"
|
|
},
|
|
{
|
|
"question": "Where do I work?",
|
|
"expected": "software engineer",
|
|
"fact_type": "job"
|
|
},
|
|
{
|
|
"question": "What are my hobbies?",
|
|
"expected": "piano, japanese",
|
|
"fact_type": "hobby"
|
|
},
|
|
{
|
|
"question": "Do I prefer cats or dogs?",
|
|
"expected": "cats",
|
|
"fact_type": "preference"
|
|
},
|
|
]
|
|
|
|
results = []
|
|
|
|
for i, test in enumerate(test_questions, 1):
|
|
print(f"[{i}/{len(test_questions)}] Testing: {test['question']}")
|
|
print(f" Expected: {test['expected']}")
|
|
|
|
response = ask_cat(test['question'])
|
|
|
|
if response:
|
|
answer = response.get('content', '')
|
|
print(f" Response: {answer[:100]}...")
|
|
|
|
# Check if expected content is in response
|
|
success = test['expected'].lower() in answer.lower()
|
|
results.append({
|
|
'question': test['question'],
|
|
'success': success,
|
|
'response': answer
|
|
})
|
|
|
|
if success:
|
|
print(f" ✅ SUCCESS - Found '{test['expected']}' in response")
|
|
else:
|
|
print(f" ❌ FAIL - Did not find '{test['expected']}' in response")
|
|
else:
|
|
print(f" ❌ ERROR - No response from Cat")
|
|
results.append({
|
|
'question': test['question'],
|
|
'success': False,
|
|
'response': None
|
|
})
|
|
|
|
print()
|
|
time.sleep(2) # Brief pause between questions
|
|
|
|
# Summary
|
|
print("=" * 70)
|
|
print("TEST SUMMARY")
|
|
print("=" * 70)
|
|
|
|
success_count = sum(1 for r in results if r['success'])
|
|
total_count = len(results)
|
|
|
|
print(f"✅ Successful recalls: {success_count}/{total_count}")
|
|
print(f"❌ Failed recalls: {total_count - success_count}/{total_count}")
|
|
|
|
if success_count == total_count:
|
|
print("\n🎉 ALL TESTS PASSED! Declarative memory recall is working!")
|
|
elif success_count > 0:
|
|
print(f"\n⚠️ PARTIAL SUCCESS: {success_count}/{total_count} recalls working")
|
|
else:
|
|
print("\n❌ ALL TESTS FAILED: Declarative recall not working")
|
|
|
|
print("\n" + "=" * 70)
|
|
print("DETAILED RESULTS")
|
|
print("=" * 70)
|
|
|
|
for result in results:
|
|
status = "✅ PASS" if result['success'] else "❌ FAIL"
|
|
print(f"\n{status}: {result['question']}")
|
|
if result['response']:
|
|
print(f" Response: {result['response'][:200]}...")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|