121 lines
3.6 KiB
Python
121 lines
3.6 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Test if declarative facts can be recalled by asking factual questions.
|
||
|
|
This tests the CRITICAL fix for Phase 2 memory consolidation.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import requests
|
||
|
|
import time
|
||
|
|
|
||
|
|
CAT_URL = "http://localhost:1865"
|
||
|
|
USER_ID = "test_user_declarative"
|
||
|
|
|
||
|
|
def ask_cat(question: str) -> dict:
|
||
|
|
"""Send a question to Cat and get the response."""
|
||
|
|
try:
|
||
|
|
response = requests.post(
|
||
|
|
f"{CAT_URL}/message",
|
||
|
|
json={"text": question, "user_id": USER_ID},
|
||
|
|
timeout=30
|
||
|
|
)
|
||
|
|
response.raise_for_status()
|
||
|
|
return response.json()
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Error: {e}")
|
||
|
|
return None
|
||
|
|
|
||
|
|
def main():
|
||
|
|
print("=" * 70)
|
||
|
|
print("DECLARATIVE MEMORY RECALL TEST")
|
||
|
|
print("=" * 70)
|
||
|
|
print("Testing if Cat can recall stored declarative facts...\n")
|
||
|
|
|
||
|
|
test_questions = [
|
||
|
|
{
|
||
|
|
"question": "What is my favorite color?",
|
||
|
|
"expected": "forest",
|
||
|
|
"fact_type": "favorite_color"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"question": "Where do I work?",
|
||
|
|
"expected": "software engineer",
|
||
|
|
"fact_type": "job"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"question": "What are my hobbies?",
|
||
|
|
"expected": "piano, japanese",
|
||
|
|
"fact_type": "hobby"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"question": "Do I prefer cats or dogs?",
|
||
|
|
"expected": "cats",
|
||
|
|
"fact_type": "preference"
|
||
|
|
},
|
||
|
|
]
|
||
|
|
|
||
|
|
results = []
|
||
|
|
|
||
|
|
for i, test in enumerate(test_questions, 1):
|
||
|
|
print(f"[{i}/{len(test_questions)}] Testing: {test['question']}")
|
||
|
|
print(f" Expected: {test['expected']}")
|
||
|
|
|
||
|
|
response = ask_cat(test['question'])
|
||
|
|
|
||
|
|
if response:
|
||
|
|
answer = response.get('content', '')
|
||
|
|
print(f" Response: {answer[:100]}...")
|
||
|
|
|
||
|
|
# Check if expected content is in response
|
||
|
|
success = test['expected'].lower() in answer.lower()
|
||
|
|
results.append({
|
||
|
|
'question': test['question'],
|
||
|
|
'success': success,
|
||
|
|
'response': answer
|
||
|
|
})
|
||
|
|
|
||
|
|
if success:
|
||
|
|
print(f" ✅ SUCCESS - Found '{test['expected']}' in response")
|
||
|
|
else:
|
||
|
|
print(f" ❌ FAIL - Did not find '{test['expected']}' in response")
|
||
|
|
else:
|
||
|
|
print(f" ❌ ERROR - No response from Cat")
|
||
|
|
results.append({
|
||
|
|
'question': test['question'],
|
||
|
|
'success': False,
|
||
|
|
'response': None
|
||
|
|
})
|
||
|
|
|
||
|
|
print()
|
||
|
|
time.sleep(2) # Brief pause between questions
|
||
|
|
|
||
|
|
# Summary
|
||
|
|
print("=" * 70)
|
||
|
|
print("TEST SUMMARY")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
success_count = sum(1 for r in results if r['success'])
|
||
|
|
total_count = len(results)
|
||
|
|
|
||
|
|
print(f"✅ Successful recalls: {success_count}/{total_count}")
|
||
|
|
print(f"❌ Failed recalls: {total_count - success_count}/{total_count}")
|
||
|
|
|
||
|
|
if success_count == total_count:
|
||
|
|
print("\n🎉 ALL TESTS PASSED! Declarative memory recall is working!")
|
||
|
|
elif success_count > 0:
|
||
|
|
print(f"\n⚠️ PARTIAL SUCCESS: {success_count}/{total_count} recalls working")
|
||
|
|
else:
|
||
|
|
print("\n❌ ALL TESTS FAILED: Declarative recall not working")
|
||
|
|
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print("DETAILED RESULTS")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
for result in results:
|
||
|
|
status = "✅ PASS" if result['success'] else "❌ FAIL"
|
||
|
|
print(f"\n{status}: {result['question']}")
|
||
|
|
if result['response']:
|
||
|
|
print(f" Response: {result['response'][:200]}...")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|