Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
234 lines
7.5 KiB
Python
Executable File
234 lines
7.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
END-TO-END Phase 2 Test
|
|
|
|
Tests the complete pipeline:
|
|
1. Send 20 diverse messages (important + trivial)
|
|
2. Verify discord_bridge filters pure junk immediately
|
|
3. Verify rest stored with consolidated=False
|
|
4. Trigger consolidation
|
|
5. Verify LLM/heuristic rates and deletes low-importance
|
|
6. Verify facts extracted to declarative memory
|
|
7. Test recall of important information
|
|
|
|
This is the TRUE test of whether Phase 2 works.
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import time
|
|
from qdrant_client import QdrantClient
|
|
|
|
CAT_URL = "http://localhost:1865"
|
|
TEST_USER = "end_to_end_test_user"
|
|
|
|
def send_message(text: str):
|
|
"""Send message to Cat"""
|
|
response = requests.post(
|
|
f"{CAT_URL}/message",
|
|
json={"text": text, "user_id": TEST_USER},
|
|
timeout=30
|
|
)
|
|
if response.status_code == 200:
|
|
return True
|
|
return False
|
|
|
|
def check_memory_state():
|
|
"""Check current memory state"""
|
|
client = QdrantClient(host='localhost', port=6333, timeout=10, prefer_grpc=False)
|
|
|
|
# Get episodic memories
|
|
episodic, _ = client.scroll('episodic', limit=100, with_payload=True, with_vectors=False)
|
|
|
|
# Get declarative memories
|
|
declarative, _ = client.scroll('declarative', limit=100, with_payload=True, with_vectors=False)
|
|
|
|
return episodic, declarative
|
|
|
|
def main():
|
|
print("=" * 70)
|
|
print("END-TO-END PHASE 2 TEST")
|
|
print("=" * 70)
|
|
|
|
# Phase 1: Send diverse messages
|
|
print("\n📤 PHASE 1: Sending 20 messages...")
|
|
print("-" * 70)
|
|
|
|
messages = {
|
|
"PURE JUNK (should be filtered immediately)": [
|
|
"lol",
|
|
"k",
|
|
"ok",
|
|
],
|
|
"IMPORTANT FACTS (should be kept + extracted)": [
|
|
"My name is Jennifer Martinez",
|
|
"I'm 28 years old",
|
|
"I work as a nurse at Seattle General Hospital",
|
|
"My cat's name is Whiskers",
|
|
"I'm allergic to peanuts",
|
|
],
|
|
"EMOTIONAL EVENTS (should be kept)": [
|
|
"My father passed away last month from cancer",
|
|
"I just got accepted into grad school!",
|
|
"I'm struggling with anxiety lately",
|
|
],
|
|
"MUNDANE CHITCHAT (should be deleted in consolidation)": [
|
|
"What's up?",
|
|
"How are you?",
|
|
"That's interesting",
|
|
"Nice weather today",
|
|
],
|
|
"PREFERENCES (should be kept + extracted)": [
|
|
"I love jazz music",
|
|
"My favorite color is purple",
|
|
"I hate horror movies",
|
|
],
|
|
}
|
|
|
|
all_messages = []
|
|
for category, msgs in messages.items():
|
|
print(f"\n{category}:")
|
|
for msg in msgs:
|
|
print(f" → {msg}")
|
|
send_message(msg)
|
|
all_messages.append((category, msg))
|
|
time.sleep(0.3)
|
|
|
|
print(f"\n✅ Sent {len(all_messages)} messages")
|
|
|
|
# Phase 2: Check immediate filtering
|
|
print("\n" + "=" * 70)
|
|
print("📊 PHASE 2: Checking immediate filtering (discord_bridge)")
|
|
print("-" * 70)
|
|
|
|
time.sleep(2) # Let storage complete
|
|
episodic, declarative = check_memory_state()
|
|
|
|
print(f"\nEpisodic memories stored: {len(episodic)}")
|
|
print(f"Declarative memories: {len(declarative)}")
|
|
|
|
# Check what was stored
|
|
stored_content = [e.payload.get('page_content', '') for e in episodic]
|
|
|
|
pure_junk = ["lol", "k", "ok"]
|
|
junk_filtered = [j for j in pure_junk if j not in stored_content]
|
|
junk_stored = [j for j in pure_junk if j in stored_content]
|
|
|
|
print(f"\n✅ Pure junk filtered: {len(junk_filtered)}/3")
|
|
if junk_filtered:
|
|
for msg in junk_filtered:
|
|
print(f" - '{msg}'")
|
|
|
|
if junk_stored:
|
|
print(f"\n⚠️ Pure junk NOT filtered: {len(junk_stored)}/3")
|
|
for msg in junk_stored:
|
|
print(f" - '{msg}'")
|
|
|
|
# Check consolidated flag
|
|
unconsolidated = [e for e in episodic if not e.payload.get('metadata', {}).get('consolidated', True)]
|
|
print(f"\n📋 Memories marked consolidated=False: {len(unconsolidated)}")
|
|
|
|
# Phase 3: Trigger consolidation
|
|
print("\n" + "=" * 70)
|
|
print("🌙 PHASE 3: Triggering consolidation")
|
|
print("-" * 70)
|
|
|
|
response = requests.post(
|
|
f"{CAT_URL}/message",
|
|
json={"text": "consolidate now", "user_id": "admin"},
|
|
timeout=60
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
print(f"✅ Consolidation triggered")
|
|
print(f"Response: {result.get('content', '')[:200]}")
|
|
else:
|
|
print(f"❌ Consolidation failed: {response.status_code}")
|
|
return
|
|
|
|
time.sleep(3) # Let consolidation complete
|
|
|
|
# Phase 4: Check post-consolidation state
|
|
print("\n" + "=" * 70)
|
|
print("📊 PHASE 4: Analyzing post-consolidation state")
|
|
print("-" * 70)
|
|
|
|
episodic_after, declarative_after = check_memory_state()
|
|
|
|
print(f"\nEpisodic memories: {len(episodic)} → {len(episodic_after)}")
|
|
print(f"Deleted: {len(episodic) - len(episodic_after)}")
|
|
print(f"\nDeclarative memories: {len(declarative)} → {len(declarative_after)}")
|
|
print(f"Facts extracted: {len(declarative_after) - len(declarative)}")
|
|
|
|
# Check what was deleted
|
|
stored_after = [e.payload.get('page_content', '') for e in episodic_after]
|
|
deleted = [msg for msg in stored_content if msg not in stored_after]
|
|
|
|
if deleted:
|
|
print(f"\n🗑️ Deleted ({len(deleted)}):")
|
|
for msg in deleted[:10]:
|
|
print(f" - '{msg}'")
|
|
|
|
# Check what important stuff remains
|
|
important_keywords = ["Jennifer", "28", "nurse", "Whiskers", "peanuts",
|
|
"father", "grad school", "anxiety", "jazz", "purple"]
|
|
important_kept = [msg for msg in stored_after if any(kw in msg for kw in important_keywords)]
|
|
|
|
print(f"\n✅ Important messages kept ({len(important_kept)}):")
|
|
for msg in important_kept[:8]:
|
|
print(f" - '{msg}'")
|
|
|
|
# Phase 5: Test recall
|
|
print("\n" + "=" * 70)
|
|
print("🧠 PHASE 5: Testing recall")
|
|
print("-" * 70)
|
|
|
|
test_queries = [
|
|
"What is my name?",
|
|
"Where do I work?",
|
|
"What's my cat's name?",
|
|
"What am I allergic to?",
|
|
]
|
|
|
|
for query in test_queries:
|
|
response = requests.post(
|
|
f"{CAT_URL}/message",
|
|
json={"text": query, "user_id": TEST_USER},
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
answer = result.get('content', '')
|
|
memories = result.get('why', {}).get('memory', {})
|
|
episodic_recalled = len(memories.get('episodic', []))
|
|
declarative_recalled = len(memories.get('declarative', []))
|
|
|
|
print(f"\nQ: {query}")
|
|
print(f"A: {answer[:150]}")
|
|
print(f" [Recalled: {episodic_recalled} episodic, {declarative_recalled} declarative]")
|
|
|
|
# Final summary
|
|
print("\n" + "=" * 70)
|
|
print("📋 FINAL SUMMARY")
|
|
print("=" * 70)
|
|
|
|
print(f"\n1. Immediate filtering:")
|
|
print(f" ✅ Filtered: {len(junk_filtered)}/3 pure junk")
|
|
print(f" 📝 Stored: {len(episodic)} messages")
|
|
|
|
print(f"\n2. Consolidation:")
|
|
print(f" 🗑️ Deleted: {len(deleted)} low-importance")
|
|
print(f" ✅ Kept: {len(episodic_after)} important")
|
|
print(f" 📚 Facts extracted: {len(declarative_after) - len(declarative)}")
|
|
|
|
print(f"\n3. Recall:")
|
|
print(f" Test queries: {len(test_queries)}")
|
|
print(f" (Check above for recall accuracy)")
|
|
|
|
print("\n" + "=" * 70)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|