#!/usr/bin/env python3 """ END-TO-END Phase 2 Test Tests the complete pipeline: 1. Send 20 diverse messages (important + trivial) 2. Verify discord_bridge filters pure junk immediately 3. Verify rest stored with consolidated=False 4. Trigger consolidation 5. Verify LLM/heuristic rates and deletes low-importance 6. Verify facts extracted to declarative memory 7. Test recall of important information This is the TRUE test of whether Phase 2 works. """ import requests import json import time from qdrant_client import QdrantClient CAT_URL = "http://localhost:1865" TEST_USER = "end_to_end_test_user" def send_message(text: str): """Send message to Cat""" response = requests.post( f"{CAT_URL}/message", json={"text": text, "user_id": TEST_USER}, timeout=30 ) if response.status_code == 200: return True return False def check_memory_state(): """Check current memory state""" client = QdrantClient(host='localhost', port=6333, timeout=10, prefer_grpc=False) # Get episodic memories episodic, _ = client.scroll('episodic', limit=100, with_payload=True, with_vectors=False) # Get declarative memories declarative, _ = client.scroll('declarative', limit=100, with_payload=True, with_vectors=False) return episodic, declarative def main(): print("=" * 70) print("END-TO-END PHASE 2 TEST") print("=" * 70) # Phase 1: Send diverse messages print("\nšŸ“¤ PHASE 1: Sending 20 messages...") print("-" * 70) messages = { "PURE JUNK (should be filtered immediately)": [ "lol", "k", "ok", ], "IMPORTANT FACTS (should be kept + extracted)": [ "My name is Jennifer Martinez", "I'm 28 years old", "I work as a nurse at Seattle General Hospital", "My cat's name is Whiskers", "I'm allergic to peanuts", ], "EMOTIONAL EVENTS (should be kept)": [ "My father passed away last month from cancer", "I just got accepted into grad school!", "I'm struggling with anxiety lately", ], "MUNDANE CHITCHAT (should be deleted in consolidation)": [ "What's up?", "How are you?", "That's interesting", "Nice weather today", ], "PREFERENCES (should be kept + extracted)": [ "I love jazz music", "My favorite color is purple", "I hate horror movies", ], } all_messages = [] for category, msgs in messages.items(): print(f"\n{category}:") for msg in msgs: print(f" → {msg}") send_message(msg) all_messages.append((category, msg)) time.sleep(0.3) print(f"\nāœ… Sent {len(all_messages)} messages") # Phase 2: Check immediate filtering print("\n" + "=" * 70) print("šŸ“Š PHASE 2: Checking immediate filtering (discord_bridge)") print("-" * 70) time.sleep(2) # Let storage complete episodic, declarative = check_memory_state() print(f"\nEpisodic memories stored: {len(episodic)}") print(f"Declarative memories: {len(declarative)}") # Check what was stored stored_content = [e.payload.get('page_content', '') for e in episodic] pure_junk = ["lol", "k", "ok"] junk_filtered = [j for j in pure_junk if j not in stored_content] junk_stored = [j for j in pure_junk if j in stored_content] print(f"\nāœ… Pure junk filtered: {len(junk_filtered)}/3") if junk_filtered: for msg in junk_filtered: print(f" - '{msg}'") if junk_stored: print(f"\nāš ļø Pure junk NOT filtered: {len(junk_stored)}/3") for msg in junk_stored: print(f" - '{msg}'") # Check consolidated flag unconsolidated = [e for e in episodic if not e.payload.get('metadata', {}).get('consolidated', True)] print(f"\nšŸ“‹ Memories marked consolidated=False: {len(unconsolidated)}") # Phase 3: Trigger consolidation print("\n" + "=" * 70) print("šŸŒ™ PHASE 3: Triggering consolidation") print("-" * 70) response = requests.post( f"{CAT_URL}/message", json={"text": "consolidate now", "user_id": "admin"}, timeout=60 ) if response.status_code == 200: result = response.json() print(f"āœ… Consolidation triggered") print(f"Response: {result.get('content', '')[:200]}") else: print(f"āŒ Consolidation failed: {response.status_code}") return time.sleep(3) # Let consolidation complete # Phase 4: Check post-consolidation state print("\n" + "=" * 70) print("šŸ“Š PHASE 4: Analyzing post-consolidation state") print("-" * 70) episodic_after, declarative_after = check_memory_state() print(f"\nEpisodic memories: {len(episodic)} → {len(episodic_after)}") print(f"Deleted: {len(episodic) - len(episodic_after)}") print(f"\nDeclarative memories: {len(declarative)} → {len(declarative_after)}") print(f"Facts extracted: {len(declarative_after) - len(declarative)}") # Check what was deleted stored_after = [e.payload.get('page_content', '') for e in episodic_after] deleted = [msg for msg in stored_content if msg not in stored_after] if deleted: print(f"\nšŸ—‘ļø Deleted ({len(deleted)}):") for msg in deleted[:10]: print(f" - '{msg}'") # Check what important stuff remains important_keywords = ["Jennifer", "28", "nurse", "Whiskers", "peanuts", "father", "grad school", "anxiety", "jazz", "purple"] important_kept = [msg for msg in stored_after if any(kw in msg for kw in important_keywords)] print(f"\nāœ… Important messages kept ({len(important_kept)}):") for msg in important_kept[:8]: print(f" - '{msg}'") # Phase 5: Test recall print("\n" + "=" * 70) print("🧠 PHASE 5: Testing recall") print("-" * 70) test_queries = [ "What is my name?", "Where do I work?", "What's my cat's name?", "What am I allergic to?", ] for query in test_queries: response = requests.post( f"{CAT_URL}/message", json={"text": query, "user_id": TEST_USER}, timeout=30 ) if response.status_code == 200: result = response.json() answer = result.get('content', '') memories = result.get('why', {}).get('memory', {}) episodic_recalled = len(memories.get('episodic', [])) declarative_recalled = len(memories.get('declarative', [])) print(f"\nQ: {query}") print(f"A: {answer[:150]}") print(f" [Recalled: {episodic_recalled} episodic, {declarative_recalled} declarative]") # Final summary print("\n" + "=" * 70) print("šŸ“‹ FINAL SUMMARY") print("=" * 70) print(f"\n1. Immediate filtering:") print(f" āœ… Filtered: {len(junk_filtered)}/3 pure junk") print(f" šŸ“ Stored: {len(episodic)} messages") print(f"\n2. Consolidation:") print(f" šŸ—‘ļø Deleted: {len(deleted)} low-importance") print(f" āœ… Kept: {len(episodic_after)} important") print(f" šŸ“š Facts extracted: {len(declarative_after) - len(declarative)}") print(f"\n3. Recall:") print(f" Test queries: {len(test_queries)}") print(f" (Check above for recall accuracy)") print("\n" + "=" * 70) if __name__ == "__main__": main()