Files
miku-discord/cheshire-cat/test_end_to_end.py
koko210Serve ae1e0aa144 add: cheshire-cat configuration, tooling, tests, and documentation
Configuration:
- .env.example, .gitignore, compose.yml (main docker compose)
- docker-compose-amd.yml (ROCm), docker-compose-macos.yml
- start.sh, stop.sh convenience scripts
- LICENSE (Apache 2.0, from upstream Cheshire Cat)

Memory management utilities:
- analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py
- check_memories.py, extract_declarative_facts.py, store_declarative_facts.py
- compare_systems.py (system comparison tool)
- benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py

Test suite:
- quick_test.py, test_setup.py, test_setup_simple.py
- test_consolidation_direct.py, test_declarative_recall.py, test_recall.py
- test_end_to_end.py, test_full_pipeline.py
- test_phase2.py, test_phase2_comprehensive.py

Documentation:
- README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md
- PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md
- POST_OPTIMIZATION_ANALYSIS.md
2026-03-04 00:51:14 +02:00

234 lines
7.5 KiB
Python
Executable File

#!/usr/bin/env python3
"""
END-TO-END Phase 2 Test
Tests the complete pipeline:
1. Send 20 diverse messages (important + trivial)
2. Verify discord_bridge filters pure junk immediately
3. Verify rest stored with consolidated=False
4. Trigger consolidation
5. Verify LLM/heuristic rates and deletes low-importance
6. Verify facts extracted to declarative memory
7. Test recall of important information
This is the TRUE test of whether Phase 2 works.
"""
import requests
import json
import time
from qdrant_client import QdrantClient
CAT_URL = "http://localhost:1865"
TEST_USER = "end_to_end_test_user"
def send_message(text: str):
"""Send message to Cat"""
response = requests.post(
f"{CAT_URL}/message",
json={"text": text, "user_id": TEST_USER},
timeout=30
)
if response.status_code == 200:
return True
return False
def check_memory_state():
"""Check current memory state"""
client = QdrantClient(host='localhost', port=6333, timeout=10, prefer_grpc=False)
# Get episodic memories
episodic, _ = client.scroll('episodic', limit=100, with_payload=True, with_vectors=False)
# Get declarative memories
declarative, _ = client.scroll('declarative', limit=100, with_payload=True, with_vectors=False)
return episodic, declarative
def main():
print("=" * 70)
print("END-TO-END PHASE 2 TEST")
print("=" * 70)
# Phase 1: Send diverse messages
print("\n📤 PHASE 1: Sending 20 messages...")
print("-" * 70)
messages = {
"PURE JUNK (should be filtered immediately)": [
"lol",
"k",
"ok",
],
"IMPORTANT FACTS (should be kept + extracted)": [
"My name is Jennifer Martinez",
"I'm 28 years old",
"I work as a nurse at Seattle General Hospital",
"My cat's name is Whiskers",
"I'm allergic to peanuts",
],
"EMOTIONAL EVENTS (should be kept)": [
"My father passed away last month from cancer",
"I just got accepted into grad school!",
"I'm struggling with anxiety lately",
],
"MUNDANE CHITCHAT (should be deleted in consolidation)": [
"What's up?",
"How are you?",
"That's interesting",
"Nice weather today",
],
"PREFERENCES (should be kept + extracted)": [
"I love jazz music",
"My favorite color is purple",
"I hate horror movies",
],
}
all_messages = []
for category, msgs in messages.items():
print(f"\n{category}:")
for msg in msgs:
print(f"{msg}")
send_message(msg)
all_messages.append((category, msg))
time.sleep(0.3)
print(f"\n✅ Sent {len(all_messages)} messages")
# Phase 2: Check immediate filtering
print("\n" + "=" * 70)
print("📊 PHASE 2: Checking immediate filtering (discord_bridge)")
print("-" * 70)
time.sleep(2) # Let storage complete
episodic, declarative = check_memory_state()
print(f"\nEpisodic memories stored: {len(episodic)}")
print(f"Declarative memories: {len(declarative)}")
# Check what was stored
stored_content = [e.payload.get('page_content', '') for e in episodic]
pure_junk = ["lol", "k", "ok"]
junk_filtered = [j for j in pure_junk if j not in stored_content]
junk_stored = [j for j in pure_junk if j in stored_content]
print(f"\n✅ Pure junk filtered: {len(junk_filtered)}/3")
if junk_filtered:
for msg in junk_filtered:
print(f" - '{msg}'")
if junk_stored:
print(f"\n⚠️ Pure junk NOT filtered: {len(junk_stored)}/3")
for msg in junk_stored:
print(f" - '{msg}'")
# Check consolidated flag
unconsolidated = [e for e in episodic if not e.payload.get('metadata', {}).get('consolidated', True)]
print(f"\n📋 Memories marked consolidated=False: {len(unconsolidated)}")
# Phase 3: Trigger consolidation
print("\n" + "=" * 70)
print("🌙 PHASE 3: Triggering consolidation")
print("-" * 70)
response = requests.post(
f"{CAT_URL}/message",
json={"text": "consolidate now", "user_id": "admin"},
timeout=60
)
if response.status_code == 200:
result = response.json()
print(f"✅ Consolidation triggered")
print(f"Response: {result.get('content', '')[:200]}")
else:
print(f"❌ Consolidation failed: {response.status_code}")
return
time.sleep(3) # Let consolidation complete
# Phase 4: Check post-consolidation state
print("\n" + "=" * 70)
print("📊 PHASE 4: Analyzing post-consolidation state")
print("-" * 70)
episodic_after, declarative_after = check_memory_state()
print(f"\nEpisodic memories: {len(episodic)}{len(episodic_after)}")
print(f"Deleted: {len(episodic) - len(episodic_after)}")
print(f"\nDeclarative memories: {len(declarative)}{len(declarative_after)}")
print(f"Facts extracted: {len(declarative_after) - len(declarative)}")
# Check what was deleted
stored_after = [e.payload.get('page_content', '') for e in episodic_after]
deleted = [msg for msg in stored_content if msg not in stored_after]
if deleted:
print(f"\n🗑️ Deleted ({len(deleted)}):")
for msg in deleted[:10]:
print(f" - '{msg}'")
# Check what important stuff remains
important_keywords = ["Jennifer", "28", "nurse", "Whiskers", "peanuts",
"father", "grad school", "anxiety", "jazz", "purple"]
important_kept = [msg for msg in stored_after if any(kw in msg for kw in important_keywords)]
print(f"\n✅ Important messages kept ({len(important_kept)}):")
for msg in important_kept[:8]:
print(f" - '{msg}'")
# Phase 5: Test recall
print("\n" + "=" * 70)
print("🧠 PHASE 5: Testing recall")
print("-" * 70)
test_queries = [
"What is my name?",
"Where do I work?",
"What's my cat's name?",
"What am I allergic to?",
]
for query in test_queries:
response = requests.post(
f"{CAT_URL}/message",
json={"text": query, "user_id": TEST_USER},
timeout=30
)
if response.status_code == 200:
result = response.json()
answer = result.get('content', '')
memories = result.get('why', {}).get('memory', {})
episodic_recalled = len(memories.get('episodic', []))
declarative_recalled = len(memories.get('declarative', []))
print(f"\nQ: {query}")
print(f"A: {answer[:150]}")
print(f" [Recalled: {episodic_recalled} episodic, {declarative_recalled} declarative]")
# Final summary
print("\n" + "=" * 70)
print("📋 FINAL SUMMARY")
print("=" * 70)
print(f"\n1. Immediate filtering:")
print(f" ✅ Filtered: {len(junk_filtered)}/3 pure junk")
print(f" 📝 Stored: {len(episodic)} messages")
print(f"\n2. Consolidation:")
print(f" 🗑️ Deleted: {len(deleted)} low-importance")
print(f" ✅ Kept: {len(episodic_after)} important")
print(f" 📚 Facts extracted: {len(declarative_after) - len(declarative)}")
print(f"\n3. Recall:")
print(f" Test queries: {len(test_queries)}")
print(f" (Check above for recall accuracy)")
print("\n" + "=" * 70)
if __name__ == "__main__":
main()