Files
miku-discord/cheshire-cat/analyze_consolidation.py

112 lines
3.2 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""
Analyze Phase 2 Consolidation Results
Check what was kept vs deleted for the comprehensive test.
"""
from qdrant_client import QdrantClient
QDRANT_HOST = "localhost"
QDRANT_PORT = 6333
COLLECTION_NAME = "episodic"
TEST_USER_ID = "discord_user_comprehensive_test"
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT, timeout=10, prefer_grpc=False)
print("=" * 70)
print("PHASE 2 CONSOLIDATION ANALYSIS")
print("=" * 70)
# Get all memories (limit increased to get all test messages)
results, _ = client.scroll(
collection_name=COLLECTION_NAME,
limit=300,
with_payload=True,
with_vectors=False
)
# Expected deletions
expected_trivial = [
"lol", "k", "ok", "lmao", "haha", "xd", "brb", "gtg"
]
# Expected to keep
expected_keep_keywords = [
"Sarah Chen", "24 years old", "Seattle", "Microsoft",
"engaged", "grandmother", "promoted", "Luna died", "panic attack", "ADHD",
"piano", "Japanese", "Ghibli", "vinyl", "marathon",
"Emma", "Jennifer", "Alex", "David",
"cilantro", "forest green", "vegetarian", "pineapple",
"Japan", "apartment", "insomnia", "pottery"
]
# Check what exists
kept_messages = []
kept_important = []
deleted_trivial = []
for point in results:
content = point.payload.get('page_content', '')
# Check if it's from our test
if any(keyword.lower() in content.lower() for keyword in expected_keep_keywords + expected_trivial):
metadata = point.payload.get('metadata', {})
is_consolidated = metadata.get('consolidated', False)
if content.lower().strip() in expected_trivial:
# This is trivial - should have been deleted
print(f"⚠️ TRIVIAL STILL EXISTS: '{content}'")
else:
# Important message - should be kept
kept_important.append(content)
# Check for deleted messages
for trivial in expected_trivial:
found = False
for point in results:
if point.payload.get('page_content', '').lower().strip() == trivial:
found = True
break
if not found:
deleted_trivial.append(trivial)
print(f"\n📊 RESULTS:")
print(f"✅ Important messages KEPT: {len(kept_important)}")
print(f"🗑️ Trivial messages DELETED: {len(deleted_trivial)}")
print(f"⚠️ Trivial messages STILL PRESENT: {8 - len(deleted_trivial)}")
print(f"\n🗑️ Successfully deleted:")
for msg in deleted_trivial:
print(f" - '{msg}'")
if len(deleted_trivial) < 8:
print(f"\n⚠️ Still present (should have been deleted):")
for trivial in expected_trivial:
if trivial not in deleted_trivial:
print(f" - '{trivial}'")
print(f"\n✅ Sample of important memories kept:")
for msg in kept_important[:10]:
print(f" - '{msg[:60]}...'")
print("\n" + "=" * 70)
print("CONSOLIDATED MEMORY CHECK")
print("=" * 70)
consolidated_count = 0
unconsolidated_count = 0
for point in results:
metadata = point.payload.get('metadata', {})
if metadata.get('consolidated', False):
consolidated_count += 1
else:
unconsolidated_count += 1
print(f"✅ Memories marked consolidated: {consolidated_count}")
print(f"⏳ Memories still unconsolidated: {unconsolidated_count}")
print("\n" + "=" * 70)