miku-discord/cheshire-cat/test_phase2.py

#!/usr/bin/env python3
"""
Phase 2 Test Script

Tests the Memory Consolidation plugin:
1. Send multiple messages (some important, some trivial)
2. Manually trigger consolidation
3. Verify important memories kept, trivial deleted
4. Check if facts were extracted to declarative memory
"""

import requests
import json
import time
from datetime import datetime


CAT_URL = "http://localhost:1865"
TEST_USER_ID = "discord_user_phase2_test"


def send_message(text: str, guild_id: str = "test_guild", description: str = ""):
    """Send a message and return response"""
    print(f"\n{'='*60}")
    print(f"📤 {description}")
    print(f"   Message: '{text}'")

    payload = {
        "text": text,
        "user_id": TEST_USER_ID,
        "metadata": {
            "guild_id": guild_id,
            "channel_id": "test_channel"
        }
    }

    try:
        response = requests.post(
            f"{CAT_URL}/message",
            json=payload,
            timeout=30
        )

        if response.status_code == 200:
            result = response.json()
            print(f"   ✅ Response: {result.get('content', '')[:80]}...")
            return True
        else:
            print(f"   ❌ Error: {response.status_code}")
            return False
    except Exception as e:
        print(f"   ❌ Exception: {e}")
        return False


def trigger_consolidation():
    """Manually trigger consolidation for testing"""
    print(f"\n{'='*60}")
    print("🌙 TRIGGERING CONSOLIDATION")
    print("="*60)

    try:
        # Try to trigger via API (if endpoint exists)
        response = requests.post(
            f"{CAT_URL}/admin/consolidate",
            timeout=60
        )

        if response.status_code == 200:
            print("✅ Consolidation triggered successfully")
            return True
        else:
            print(f"⚠️  API returned {response.status_code}")
            print("   (This is expected - no admin endpoint yet)")
            return False
    except Exception as e:
        print(f"⚠️  Could not trigger via API: {e}")
        print("   (This is expected - no admin endpoint yet)")
        return False


def check_logs():
    """Check Docker logs for consolidation output"""
    print(f"\n{'='*60}")
    print("📋 CHECKING CONSOLIDATION LOGS")
    print("="*60)
    print("\nRun this command manually to check:")
    print("   docker logs miku_cheshire_cat_test 2>&1 | grep -E '(Consolidation|🌙|✨|💾|🗑️)' | tail -30")


def main():
    print("="*60)
    print("PHASE 2 TEST: Memory Consolidation")
    print("="*60)

    print(f"\n🧪 Testing with user: {TEST_USER_ID}")
    print("   Sending mix of important and trivial messages")

    # Wait for Cat to be ready
    time.sleep(2)

    # Test Suite 1: Send varied messages
    print("\n" + "="*60)
    print("TEST SUITE 1: Varied Message Types")
    print("="*60)

    messages = [
        # Trivial (should be deleted)
        ("lol", "Trivial - pure reaction"),
        ("k", "Trivial - 1 char"),
        ("okay", "Trivial - acknowledgment"),

        # Important (should be kept)
        ("My name is Alice", "Important - personal info"),
        ("I love playing guitar", "Important - hobby/preference"),
        ("My dog died last month", "Important - emotional event"),
        ("I'm studying computer science at MIT", "Important - education"),

        # Medium (depends on context)
        ("What's the weather like?", "Medium - generic question"),
        ("I had pizza for lunch", "Medium - daily activity"),

        # Very important (should definitely be kept)
        ("I'm getting married next month!", "Critical - major life event"),
        ("I've been diagnosed with depression", "Critical - health/emotional"),
    ]

    for text, desc in messages:
        send_message(text, description=desc)
        time.sleep(1)

    # Test Suite 2: Trigger consolidation
    print("\n" + "="*60)
    print("TEST SUITE 2: Consolidation Trigger")
    print("="*60)

    trigger_consolidation()

    # Wait for consolidation to complete
    print("\n⏳ Waiting 10 seconds for consolidation to complete...")
    time.sleep(10)

    # Test Suite 3: Verify results
    print("\n" + "="*60)
    print("TEST SUITE 3: Verification")
    print("="*60)

    print("\n✅ EXPECTED RESULTS:")
    print("\n📝 Should be DELETED (trivial):")
    print("   - 'lol' (pure reaction)")
    print("   - 'k' (too short)")
    print("   - 'okay' (acknowledgment)")

    print("\n💾 Should be KEPT (important):")
    print("   - 'My name is Alice' (importance: 7-8)")
    print("   - 'I love playing guitar' (importance: 6-7)")
    print("   - 'My dog died last month' (importance: 9-10)")
    print("   - 'I'm studying CS at MIT' (importance: 7-8)")
    print("   - 'I'm getting married!' (importance: 10)")
    print("   - 'diagnosed with depression' (importance: 10)")

    print("\n📚 Should be extracted as FACTS (declarative memory):")
    print("   - 'User's name is Alice'")
    print("   - 'User plays guitar'")
    print("   - 'User lost their dog recently'")
    print("   - 'User studies CS at MIT'")
    print("   - 'User getting married soon'")
    print("   - 'User has depression'")

    # Check logs
    check_logs()

    # Summary
    print("\n" + "="*60)
    print("MANUAL VERIFICATION STEPS")
    print("="*60)

    print("""
1. Check Docker logs for consolidation output:
   docker logs miku_cheshire_cat_test 2>&1 | tail -100

2. Look for these indicators:
   🌙 [Consolidation] Starting...
   📊 [Consolidation] Fetching unconsolidated memories
   ✨ [Consolidation] Complete! Stats: ...

3. Verify in next conversation:
   Test if Miku remembers:
   - User's name (Alice)
   - That user plays guitar
   - That user is getting married

   Should NOT remember:
   - 'lol', 'k', 'okay'

4. Test memory recall:
   Send: "What do you know about me?"
   Expected: Mentions name, guitar, upcoming marriage, etc.

5. Check memory stats:
   If stats show:
   - Processed: 11 memories
   - Kept: 6-7 important ones
   - Deleted: 4-5 trivial ones
   - Facts learned: 5-6 facts
   Then Phase 2 is working! ✅
""")

    print("\n✨ Phase 2 testing complete!")
    print("\nNext: Run verification queries to test memory recall")


if __name__ == "__main__":
    main()