#!/usr/bin/env python3 """ Full pipeline test for Phase 2 memory consolidation with declarative extraction. Steps: 1. Tell Miku 20 facts (mix of important and trivial) 2. Run consolidation to delete trivial messages 3. Extract facts from consolidated episodic memories 4. Store facts in declarative memory 5. Test recall with factual questions """ import requests import time import sys CAT_URL = "http://localhost:1865" USER_ID = "test_user_pipeline" # Test messages to tell Miku TEST_MESSAGES = [ # Important facts (should be remembered) "My name is Sarah Chen.", "I'm 28 years old.", "I live in Seattle, Washington.", "I work as a software engineer at Microsoft.", "My favorite color is forest green.", "I love playing piano. I've been practicing for 15 years.", "I'm learning Japanese! Currently at N3 level.", "I have a cat named Luna.", "I'm allergic to peanuts.", "I prefer cats over dogs, though I like both.", "My favorite food is ramen.", "I enjoy hiking on weekends.", "I graduated from UW in 2018.", "My birthday is March 15th.", # Trivial messages (should be deleted during consolidation) "lol", "k", "haha", "brb", "nice", "cool", ] # Questions to test recall RECALL_TESTS = [ { "question": "What is my name?", "expected": "sarah", "fact_type": "name" }, { "question": "How old am I?", "expected": "28", "fact_type": "age" }, { "question": "Where do I live?", "expected": "seattle", "fact_type": "location" }, { "question": "What do I do for work?", "expected": "software engineer", "fact_type": "job" }, { "question": "What is my favorite color?", "expected": "forest green", "fact_type": "favorite_color" }, { "question": "What instruments do I play?", "expected": "piano", "fact_type": "hobby" }, { "question": "What language am I learning?", "expected": "japanese", "fact_type": "hobby" }, { "question": "What is my cat's name?", "expected": "luna", "fact_type": "pet_name" }, { "question": "What am I allergic to?", "expected": "peanut", "fact_type": "allergy" }, { "question": "Do I prefer cats or dogs?", "expected": "cat", "fact_type": "preference" }, ] def send_message(text: str) -> dict: """Send a message to Miku.""" try: response = requests.post( f"{CAT_URL}/message", json={"text": text, "user_id": USER_ID}, timeout=30 ) response.raise_for_status() return response.json() except Exception as e: print(f" ❌ Error sending message: {e}") return None def trigger_consolidation() -> bool: """Trigger memory consolidation.""" try: response = send_message("consolidate now") if response: print(" ✅ Consolidation triggered") return True return False except Exception as e: print(f" ❌ Error triggering consolidation: {e}") return False def main(): print("=" * 80) print("PHASE 2 FULL PIPELINE TEST") print("=" * 80) print(f"Testing with user: {USER_ID}\n") # Step 1: Tell Miku the facts print("STEP 1: Telling Miku facts...") print("-" * 80) successful_sends = 0 for i, message in enumerate(TEST_MESSAGES, 1): is_trivial = message in ["lol", "k", "haha", "brb", "nice", "cool"] msg_type = "TRIVIAL" if is_trivial else "IMPORTANT" print(f"[{i}/{len(TEST_MESSAGES)}] {msg_type}: {message}") response = send_message(message) if response: print(f" ✅ Sent successfully") successful_sends += 1 else: print(f" ❌ Failed to send") time.sleep(1) # Brief pause between messages print(f"\n✅ Successfully sent {successful_sends}/{len(TEST_MESSAGES)} messages\n") # Step 2: Trigger consolidation print("STEP 2: Triggering consolidation...") print("-" * 80) if not trigger_consolidation(): print("❌ Failed to trigger consolidation") sys.exit(1) print("⏳ Waiting for consolidation to complete...") time.sleep(5) print("✅ Consolidation complete\n") # Step 3: Extract and store declarative facts print("STEP 3: Extracting and storing declarative facts...") print("-" * 80) print("Running extract_declarative_facts.py...") import subprocess result = subprocess.run( ["python3", "extract_declarative_facts.py"], capture_output=True, text=True ) if result.returncode == 0: # Count extracted facts from output facts_count = result.stdout.count("✅ Extracted from:") print(f"✅ Extracted {facts_count} facts") else: print(f"❌ Extraction failed: {result.stderr[:200]}") sys.exit(1) print("\nRunning store_declarative_facts.py...") result = subprocess.run( ["python3", "store_declarative_facts.py"], capture_output=True, text=True ) if result.returncode == 0: # Check for success in output if "Successfully stored:" in result.stdout: stored_line = [l for l in result.stdout.split('\n') if "Successfully stored:" in l][0] print(f"✅ {stored_line.strip()}") else: print("✅ Facts stored") else: print(f"❌ Storage failed: {result.stderr[:200]}") sys.exit(1) print() # Step 4: Test recall print("STEP 4: Testing declarative memory recall...") print("-" * 80) results = [] successful_recalls = 0 for i, test in enumerate(RECALL_TESTS, 1): question = test["question"] expected = test["expected"].lower() print(f"[{i}/{len(RECALL_TESTS)}] {question}") print(f" Expected: {expected}") response = send_message(question) if response: answer = response.get('content', '').lower() success = expected in answer if success: print(f" ✅ RECALLED correctly") successful_recalls += 1 else: print(f" ❌ NOT recalled") print(f" Response: {answer[:100]}...") results.append({ 'question': question, 'expected': expected, 'success': success, 'response': response.get('content', '') }) else: print(f" ❌ ERROR - No response") results.append({ 'question': question, 'expected': expected, 'success': False, 'response': None }) print() time.sleep(2) # Final summary print("=" * 80) print("FINAL RESULTS") print("=" * 80) success_rate = (successful_recalls / len(RECALL_TESTS)) * 100 print(f"\n📊 RECALL SUCCESS RATE: {successful_recalls}/{len(RECALL_TESTS)} ({success_rate:.1f}%)\n") if success_rate == 100: print("🎉 PERFECT! All facts recalled correctly!") elif success_rate >= 80: print("✅ EXCELLENT! Most facts recalled correctly.") elif success_rate >= 50: print("⚠️ PARTIAL SUCCESS - Needs improvement.") else: print("❌ POOR PERFORMANCE - System needs significant fixes.") print("\nDetailed results:") print("-" * 80) for result in results: status = "✅" if result['success'] else "❌" print(f"{status} {result['question']}") if not result['success'] and result['response']: print(f" Response: {result['response'][:150]}...") print("\n" + "=" * 80) if success_rate == 100: print("✅ PHASE 2 COMPLETE AND READY FOR PRODUCTION!") elif success_rate >= 80: print("✅ PHASE 2 MOSTLY WORKING - Minor refinements needed") else: print("❌ PHASE 2 NEEDS MORE WORK") print("=" * 80) if __name__ == "__main__": main()