112 lines
3.2 KiB
Python
112 lines
3.2 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Analyze Phase 2 Consolidation Results
|
||
|
|
|
||
|
|
Check what was kept vs deleted for the comprehensive test.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from qdrant_client import QdrantClient
|
||
|
|
|
||
|
|
QDRANT_HOST = "localhost"
|
||
|
|
QDRANT_PORT = 6333
|
||
|
|
COLLECTION_NAME = "episodic"
|
||
|
|
TEST_USER_ID = "discord_user_comprehensive_test"
|
||
|
|
|
||
|
|
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT, timeout=10, prefer_grpc=False)
|
||
|
|
|
||
|
|
print("=" * 70)
|
||
|
|
print("PHASE 2 CONSOLIDATION ANALYSIS")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
# Get all memories (limit increased to get all test messages)
|
||
|
|
results, _ = client.scroll(
|
||
|
|
collection_name=COLLECTION_NAME,
|
||
|
|
limit=300,
|
||
|
|
with_payload=True,
|
||
|
|
with_vectors=False
|
||
|
|
)
|
||
|
|
|
||
|
|
# Expected deletions
|
||
|
|
expected_trivial = [
|
||
|
|
"lol", "k", "ok", "lmao", "haha", "xd", "brb", "gtg"
|
||
|
|
]
|
||
|
|
|
||
|
|
# Expected to keep
|
||
|
|
expected_keep_keywords = [
|
||
|
|
"Sarah Chen", "24 years old", "Seattle", "Microsoft",
|
||
|
|
"engaged", "grandmother", "promoted", "Luna died", "panic attack", "ADHD",
|
||
|
|
"piano", "Japanese", "Ghibli", "vinyl", "marathon",
|
||
|
|
"Emma", "Jennifer", "Alex", "David",
|
||
|
|
"cilantro", "forest green", "vegetarian", "pineapple",
|
||
|
|
"Japan", "apartment", "insomnia", "pottery"
|
||
|
|
]
|
||
|
|
|
||
|
|
# Check what exists
|
||
|
|
kept_messages = []
|
||
|
|
kept_important = []
|
||
|
|
deleted_trivial = []
|
||
|
|
|
||
|
|
for point in results:
|
||
|
|
content = point.payload.get('page_content', '')
|
||
|
|
|
||
|
|
# Check if it's from our test
|
||
|
|
if any(keyword.lower() in content.lower() for keyword in expected_keep_keywords + expected_trivial):
|
||
|
|
metadata = point.payload.get('metadata', {})
|
||
|
|
is_consolidated = metadata.get('consolidated', False)
|
||
|
|
|
||
|
|
if content.lower().strip() in expected_trivial:
|
||
|
|
# This is trivial - should have been deleted
|
||
|
|
print(f"⚠️ TRIVIAL STILL EXISTS: '{content}'")
|
||
|
|
else:
|
||
|
|
# Important message - should be kept
|
||
|
|
kept_important.append(content)
|
||
|
|
|
||
|
|
# Check for deleted messages
|
||
|
|
for trivial in expected_trivial:
|
||
|
|
found = False
|
||
|
|
for point in results:
|
||
|
|
if point.payload.get('page_content', '').lower().strip() == trivial:
|
||
|
|
found = True
|
||
|
|
break
|
||
|
|
|
||
|
|
if not found:
|
||
|
|
deleted_trivial.append(trivial)
|
||
|
|
|
||
|
|
print(f"\n📊 RESULTS:")
|
||
|
|
print(f"✅ Important messages KEPT: {len(kept_important)}")
|
||
|
|
print(f"🗑️ Trivial messages DELETED: {len(deleted_trivial)}")
|
||
|
|
print(f"⚠️ Trivial messages STILL PRESENT: {8 - len(deleted_trivial)}")
|
||
|
|
|
||
|
|
print(f"\n🗑️ Successfully deleted:")
|
||
|
|
for msg in deleted_trivial:
|
||
|
|
print(f" - '{msg}'")
|
||
|
|
|
||
|
|
if len(deleted_trivial) < 8:
|
||
|
|
print(f"\n⚠️ Still present (should have been deleted):")
|
||
|
|
for trivial in expected_trivial:
|
||
|
|
if trivial not in deleted_trivial:
|
||
|
|
print(f" - '{trivial}'")
|
||
|
|
|
||
|
|
print(f"\n✅ Sample of important memories kept:")
|
||
|
|
for msg in kept_important[:10]:
|
||
|
|
print(f" - '{msg[:60]}...'")
|
||
|
|
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print("CONSOLIDATED MEMORY CHECK")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
consolidated_count = 0
|
||
|
|
unconsolidated_count = 0
|
||
|
|
|
||
|
|
for point in results:
|
||
|
|
metadata = point.payload.get('metadata', {})
|
||
|
|
if metadata.get('consolidated', False):
|
||
|
|
consolidated_count += 1
|
||
|
|
else:
|
||
|
|
unconsolidated_count += 1
|
||
|
|
|
||
|
|
print(f"✅ Memories marked consolidated: {consolidated_count}")
|
||
|
|
print(f"⏳ Memories still unconsolidated: {unconsolidated_count}")
|
||
|
|
|
||
|
|
print("\n" + "=" * 70)
|