add: cheshire-cat configuration, tooling, tests, and documentation
Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
This commit is contained in:
233
cheshire-cat/test_end_to_end.py
Executable file
233
cheshire-cat/test_end_to_end.py
Executable file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
END-TO-END Phase 2 Test
|
||||
|
||||
Tests the complete pipeline:
|
||||
1. Send 20 diverse messages (important + trivial)
|
||||
2. Verify discord_bridge filters pure junk immediately
|
||||
3. Verify rest stored with consolidated=False
|
||||
4. Trigger consolidation
|
||||
5. Verify LLM/heuristic rates and deletes low-importance
|
||||
6. Verify facts extracted to declarative memory
|
||||
7. Test recall of important information
|
||||
|
||||
This is the TRUE test of whether Phase 2 works.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
CAT_URL = "http://localhost:1865"
|
||||
TEST_USER = "end_to_end_test_user"
|
||||
|
||||
def send_message(text: str):
|
||||
"""Send message to Cat"""
|
||||
response = requests.post(
|
||||
f"{CAT_URL}/message",
|
||||
json={"text": text, "user_id": TEST_USER},
|
||||
timeout=30
|
||||
)
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
return False
|
||||
|
||||
def check_memory_state():
|
||||
"""Check current memory state"""
|
||||
client = QdrantClient(host='localhost', port=6333, timeout=10, prefer_grpc=False)
|
||||
|
||||
# Get episodic memories
|
||||
episodic, _ = client.scroll('episodic', limit=100, with_payload=True, with_vectors=False)
|
||||
|
||||
# Get declarative memories
|
||||
declarative, _ = client.scroll('declarative', limit=100, with_payload=True, with_vectors=False)
|
||||
|
||||
return episodic, declarative
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("END-TO-END PHASE 2 TEST")
|
||||
print("=" * 70)
|
||||
|
||||
# Phase 1: Send diverse messages
|
||||
print("\n📤 PHASE 1: Sending 20 messages...")
|
||||
print("-" * 70)
|
||||
|
||||
messages = {
|
||||
"PURE JUNK (should be filtered immediately)": [
|
||||
"lol",
|
||||
"k",
|
||||
"ok",
|
||||
],
|
||||
"IMPORTANT FACTS (should be kept + extracted)": [
|
||||
"My name is Jennifer Martinez",
|
||||
"I'm 28 years old",
|
||||
"I work as a nurse at Seattle General Hospital",
|
||||
"My cat's name is Whiskers",
|
||||
"I'm allergic to peanuts",
|
||||
],
|
||||
"EMOTIONAL EVENTS (should be kept)": [
|
||||
"My father passed away last month from cancer",
|
||||
"I just got accepted into grad school!",
|
||||
"I'm struggling with anxiety lately",
|
||||
],
|
||||
"MUNDANE CHITCHAT (should be deleted in consolidation)": [
|
||||
"What's up?",
|
||||
"How are you?",
|
||||
"That's interesting",
|
||||
"Nice weather today",
|
||||
],
|
||||
"PREFERENCES (should be kept + extracted)": [
|
||||
"I love jazz music",
|
||||
"My favorite color is purple",
|
||||
"I hate horror movies",
|
||||
],
|
||||
}
|
||||
|
||||
all_messages = []
|
||||
for category, msgs in messages.items():
|
||||
print(f"\n{category}:")
|
||||
for msg in msgs:
|
||||
print(f" → {msg}")
|
||||
send_message(msg)
|
||||
all_messages.append((category, msg))
|
||||
time.sleep(0.3)
|
||||
|
||||
print(f"\n✅ Sent {len(all_messages)} messages")
|
||||
|
||||
# Phase 2: Check immediate filtering
|
||||
print("\n" + "=" * 70)
|
||||
print("📊 PHASE 2: Checking immediate filtering (discord_bridge)")
|
||||
print("-" * 70)
|
||||
|
||||
time.sleep(2) # Let storage complete
|
||||
episodic, declarative = check_memory_state()
|
||||
|
||||
print(f"\nEpisodic memories stored: {len(episodic)}")
|
||||
print(f"Declarative memories: {len(declarative)}")
|
||||
|
||||
# Check what was stored
|
||||
stored_content = [e.payload.get('page_content', '') for e in episodic]
|
||||
|
||||
pure_junk = ["lol", "k", "ok"]
|
||||
junk_filtered = [j for j in pure_junk if j not in stored_content]
|
||||
junk_stored = [j for j in pure_junk if j in stored_content]
|
||||
|
||||
print(f"\n✅ Pure junk filtered: {len(junk_filtered)}/3")
|
||||
if junk_filtered:
|
||||
for msg in junk_filtered:
|
||||
print(f" - '{msg}'")
|
||||
|
||||
if junk_stored:
|
||||
print(f"\n⚠️ Pure junk NOT filtered: {len(junk_stored)}/3")
|
||||
for msg in junk_stored:
|
||||
print(f" - '{msg}'")
|
||||
|
||||
# Check consolidated flag
|
||||
unconsolidated = [e for e in episodic if not e.payload.get('metadata', {}).get('consolidated', True)]
|
||||
print(f"\n📋 Memories marked consolidated=False: {len(unconsolidated)}")
|
||||
|
||||
# Phase 3: Trigger consolidation
|
||||
print("\n" + "=" * 70)
|
||||
print("🌙 PHASE 3: Triggering consolidation")
|
||||
print("-" * 70)
|
||||
|
||||
response = requests.post(
|
||||
f"{CAT_URL}/message",
|
||||
json={"text": "consolidate now", "user_id": "admin"},
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
print(f"✅ Consolidation triggered")
|
||||
print(f"Response: {result.get('content', '')[:200]}")
|
||||
else:
|
||||
print(f"❌ Consolidation failed: {response.status_code}")
|
||||
return
|
||||
|
||||
time.sleep(3) # Let consolidation complete
|
||||
|
||||
# Phase 4: Check post-consolidation state
|
||||
print("\n" + "=" * 70)
|
||||
print("📊 PHASE 4: Analyzing post-consolidation state")
|
||||
print("-" * 70)
|
||||
|
||||
episodic_after, declarative_after = check_memory_state()
|
||||
|
||||
print(f"\nEpisodic memories: {len(episodic)} → {len(episodic_after)}")
|
||||
print(f"Deleted: {len(episodic) - len(episodic_after)}")
|
||||
print(f"\nDeclarative memories: {len(declarative)} → {len(declarative_after)}")
|
||||
print(f"Facts extracted: {len(declarative_after) - len(declarative)}")
|
||||
|
||||
# Check what was deleted
|
||||
stored_after = [e.payload.get('page_content', '') for e in episodic_after]
|
||||
deleted = [msg for msg in stored_content if msg not in stored_after]
|
||||
|
||||
if deleted:
|
||||
print(f"\n🗑️ Deleted ({len(deleted)}):")
|
||||
for msg in deleted[:10]:
|
||||
print(f" - '{msg}'")
|
||||
|
||||
# Check what important stuff remains
|
||||
important_keywords = ["Jennifer", "28", "nurse", "Whiskers", "peanuts",
|
||||
"father", "grad school", "anxiety", "jazz", "purple"]
|
||||
important_kept = [msg for msg in stored_after if any(kw in msg for kw in important_keywords)]
|
||||
|
||||
print(f"\n✅ Important messages kept ({len(important_kept)}):")
|
||||
for msg in important_kept[:8]:
|
||||
print(f" - '{msg}'")
|
||||
|
||||
# Phase 5: Test recall
|
||||
print("\n" + "=" * 70)
|
||||
print("🧠 PHASE 5: Testing recall")
|
||||
print("-" * 70)
|
||||
|
||||
test_queries = [
|
||||
"What is my name?",
|
||||
"Where do I work?",
|
||||
"What's my cat's name?",
|
||||
"What am I allergic to?",
|
||||
]
|
||||
|
||||
for query in test_queries:
|
||||
response = requests.post(
|
||||
f"{CAT_URL}/message",
|
||||
json={"text": query, "user_id": TEST_USER},
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
answer = result.get('content', '')
|
||||
memories = result.get('why', {}).get('memory', {})
|
||||
episodic_recalled = len(memories.get('episodic', []))
|
||||
declarative_recalled = len(memories.get('declarative', []))
|
||||
|
||||
print(f"\nQ: {query}")
|
||||
print(f"A: {answer[:150]}")
|
||||
print(f" [Recalled: {episodic_recalled} episodic, {declarative_recalled} declarative]")
|
||||
|
||||
# Final summary
|
||||
print("\n" + "=" * 70)
|
||||
print("📋 FINAL SUMMARY")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"\n1. Immediate filtering:")
|
||||
print(f" ✅ Filtered: {len(junk_filtered)}/3 pure junk")
|
||||
print(f" 📝 Stored: {len(episodic)} messages")
|
||||
|
||||
print(f"\n2. Consolidation:")
|
||||
print(f" 🗑️ Deleted: {len(deleted)} low-importance")
|
||||
print(f" ✅ Kept: {len(episodic_after)} important")
|
||||
print(f" 📚 Facts extracted: {len(declarative_after) - len(declarative)}")
|
||||
|
||||
print(f"\n3. Recall:")
|
||||
print(f" Test queries: {len(test_queries)}")
|
||||
print(f" (Check above for recall accuracy)")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user