151 lines
4.9 KiB
Python
151 lines
4.9 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Manual Memory Consolidation Script
|
||
|
|
|
||
|
|
Directly connects to Qdrant and performs consolidation logic:
|
||
|
|
1. Query for all memories with consolidated=False
|
||
|
|
2. Apply heuristic: delete trivial ("lol", "k", ≤2 chars)
|
||
|
|
3. Mark kept memories as consolidated=True
|
||
|
|
4. Report stats
|
||
|
|
|
||
|
|
This bypasses the Cat's plugin system for direct testing.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from qdrant_client import QdrantClient
|
||
|
|
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
||
|
|
import sys
|
||
|
|
|
||
|
|
# Qdrant connection
|
||
|
|
QDRANT_HOST = "localhost"
|
||
|
|
QDRANT_PORT = 6333
|
||
|
|
COLLECTION_NAME = "episodic"
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
print("=" * 70)
|
||
|
|
print("MANUAL MEMORY CONSOLIDATION")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
# Connect to Qdrant
|
||
|
|
print(f"\n📡 Connecting to Qdrant at {QDRANT_HOST}:{QDRANT_PORT}...")
|
||
|
|
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT, timeout=10, prefer_grpc=False)
|
||
|
|
|
||
|
|
# Check collection exists
|
||
|
|
try:
|
||
|
|
collection_info = client.get_collection(COLLECTION_NAME)
|
||
|
|
print(f"✅ Connected to collection '{COLLECTION_NAME}'")
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Error: {e}")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
# Query for ALL memories (since the field might not exist yet)
|
||
|
|
print(f"\n🔍 Querying for all memories...")
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Get all memories - we'll filter based on metadata presence
|
||
|
|
results, next_offset = client.scroll(
|
||
|
|
collection_name=COLLECTION_NAME,
|
||
|
|
limit=1000,
|
||
|
|
with_payload=True,
|
||
|
|
with_vectors=False
|
||
|
|
)
|
||
|
|
|
||
|
|
print(f"✅ Found {len(results)} total memories")
|
||
|
|
|
||
|
|
# Filter to only unconsolidated ones (those without the field or with False)
|
||
|
|
unconsolidated = []
|
||
|
|
for point in results:
|
||
|
|
metadata = point.payload.get('metadata', {})
|
||
|
|
consolidated = metadata.get('consolidated', False)
|
||
|
|
if not consolidated:
|
||
|
|
unconsolidated.append(point)
|
||
|
|
|
||
|
|
print(f"📊 Unconsolidated: {len(unconsolidated)}")
|
||
|
|
|
||
|
|
if len(unconsolidated) == 0:
|
||
|
|
print("\n⚠️ No unconsolidated memories found!")
|
||
|
|
print("All memories have already been consolidated.")
|
||
|
|
return
|
||
|
|
|
||
|
|
# Use the unconsolidated subset for processing
|
||
|
|
results = unconsolidated
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Error querying memories: {e}")
|
||
|
|
import traceback
|
||
|
|
traceback.print_exc()
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
# Process each memory
|
||
|
|
print(f"\n🔧 Processing memories...")
|
||
|
|
stats = {
|
||
|
|
'total': len(results),
|
||
|
|
'kept': 0,
|
||
|
|
'deleted': 0
|
||
|
|
}
|
||
|
|
|
||
|
|
# Expanded trivial patterns - common reactions and abbreviations
|
||
|
|
trivial_patterns = [
|
||
|
|
'lol', 'k', 'ok', 'okay', 'haha', 'lmao', 'xd', 'rofl', 'lmfao',
|
||
|
|
'brb', 'gtg', 'afk', 'ttyl', 'lmk', 'idk', 'tbh', 'imo', 'imho',
|
||
|
|
'omg', 'wtf', 'fyi', 'btw', 'nvm', 'jk', 'ikr', 'smh',
|
||
|
|
'hehe', 'heh', 'gg', 'wp', 'gz', 'gj', 'ty', 'thx', 'np', 'yw'
|
||
|
|
]
|
||
|
|
|
||
|
|
for point in results:
|
||
|
|
point_id = point.id
|
||
|
|
content = point.payload.get('page_content', '')
|
||
|
|
metadata = point.payload.get('metadata', {})
|
||
|
|
|
||
|
|
# Apply heuristic
|
||
|
|
is_trivial = False
|
||
|
|
|
||
|
|
# Check length (1-3 chars that are just letters/common patterns)
|
||
|
|
if len(content.strip()) <= 3:
|
||
|
|
# Check if it's just letters or in trivial patterns
|
||
|
|
if content.lower().strip() in trivial_patterns or content.strip().isalpha():
|
||
|
|
is_trivial = True
|
||
|
|
|
||
|
|
# Check if it's a common reaction/abbreviation
|
||
|
|
if content.lower().strip() in trivial_patterns:
|
||
|
|
is_trivial = True
|
||
|
|
|
||
|
|
if is_trivial:
|
||
|
|
# DELETE trivial memory
|
||
|
|
try:
|
||
|
|
client.delete(
|
||
|
|
collection_name=COLLECTION_NAME,
|
||
|
|
points_selector=[point_id]
|
||
|
|
)
|
||
|
|
stats['deleted'] += 1
|
||
|
|
print(f" 🗑️ Deleted: '{content[:50]}'")
|
||
|
|
except Exception as e:
|
||
|
|
print(f" ❌ Error deleting {point_id}: {e}")
|
||
|
|
else:
|
||
|
|
# KEEP important memory - mark as consolidated
|
||
|
|
try:
|
||
|
|
metadata['consolidated'] = True
|
||
|
|
client.set_payload(
|
||
|
|
collection_name=COLLECTION_NAME,
|
||
|
|
payload={"metadata": metadata},
|
||
|
|
points=[point_id]
|
||
|
|
)
|
||
|
|
stats['kept'] += 1
|
||
|
|
print(f" ✅ Kept: '{content[:50]}'")
|
||
|
|
except Exception as e:
|
||
|
|
print(f" ❌ Error updating {point_id}: {e}")
|
||
|
|
|
||
|
|
# Report results
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print("CONSOLIDATION COMPLETE")
|
||
|
|
print("=" * 70)
|
||
|
|
print(f"📊 Total processed: {stats['total']}")
|
||
|
|
print(f"✅ Kept: {stats['kept']}")
|
||
|
|
print(f"🗑️ Deleted: {stats['deleted']}")
|
||
|
|
print(f"📈 Retention rate: {stats['kept']/stats['total']*100:.1f}%")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|