#!/usr/bin/env python3 """ Manual Memory Consolidation Script Directly connects to Qdrant and performs consolidation logic: 1. Query for all memories with consolidated=False 2. Apply heuristic: delete trivial ("lol", "k", ≤2 chars) 3. Mark kept memories as consolidated=True 4. Report stats This bypasses the Cat's plugin system for direct testing. """ from qdrant_client import QdrantClient from qdrant_client.models import Filter, FieldCondition, MatchValue import sys # Qdrant connection QDRANT_HOST = "localhost" QDRANT_PORT = 6333 COLLECTION_NAME = "episodic" def main(): print("=" * 70) print("MANUAL MEMORY CONSOLIDATION") print("=" * 70) # Connect to Qdrant print(f"\nšŸ“” Connecting to Qdrant at {QDRANT_HOST}:{QDRANT_PORT}...") client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT, timeout=10, prefer_grpc=False) # Check collection exists try: collection_info = client.get_collection(COLLECTION_NAME) print(f"āœ… Connected to collection '{COLLECTION_NAME}'") except Exception as e: print(f"āŒ Error: {e}") sys.exit(1) # Query for ALL memories (since the field might not exist yet) print(f"\nšŸ” Querying for all memories...") try: # Get all memories - we'll filter based on metadata presence results, next_offset = client.scroll( collection_name=COLLECTION_NAME, limit=1000, with_payload=True, with_vectors=False ) print(f"āœ… Found {len(results)} total memories") # Filter to only unconsolidated ones (those without the field or with False) unconsolidated = [] for point in results: metadata = point.payload.get('metadata', {}) consolidated = metadata.get('consolidated', False) if not consolidated: unconsolidated.append(point) print(f"šŸ“Š Unconsolidated: {len(unconsolidated)}") if len(unconsolidated) == 0: print("\nāš ļø No unconsolidated memories found!") print("All memories have already been consolidated.") return # Use the unconsolidated subset for processing results = unconsolidated except Exception as e: print(f"āŒ Error querying memories: {e}") import traceback traceback.print_exc() sys.exit(1) # Process each memory print(f"\nšŸ”§ Processing memories...") stats = { 'total': len(results), 'kept': 0, 'deleted': 0 } # Expanded trivial patterns - common reactions and abbreviations trivial_patterns = [ 'lol', 'k', 'ok', 'okay', 'haha', 'lmao', 'xd', 'rofl', 'lmfao', 'brb', 'gtg', 'afk', 'ttyl', 'lmk', 'idk', 'tbh', 'imo', 'imho', 'omg', 'wtf', 'fyi', 'btw', 'nvm', 'jk', 'ikr', 'smh', 'hehe', 'heh', 'gg', 'wp', 'gz', 'gj', 'ty', 'thx', 'np', 'yw' ] for point in results: point_id = point.id content = point.payload.get('page_content', '') metadata = point.payload.get('metadata', {}) # Apply heuristic is_trivial = False # Check length (1-3 chars that are just letters/common patterns) if len(content.strip()) <= 3: # Check if it's just letters or in trivial patterns if content.lower().strip() in trivial_patterns or content.strip().isalpha(): is_trivial = True # Check if it's a common reaction/abbreviation if content.lower().strip() in trivial_patterns: is_trivial = True if is_trivial: # DELETE trivial memory try: client.delete( collection_name=COLLECTION_NAME, points_selector=[point_id] ) stats['deleted'] += 1 print(f" šŸ—‘ļø Deleted: '{content[:50]}'") except Exception as e: print(f" āŒ Error deleting {point_id}: {e}") else: # KEEP important memory - mark as consolidated try: metadata['consolidated'] = True client.set_payload( collection_name=COLLECTION_NAME, payload={"metadata": metadata}, points=[point_id] ) stats['kept'] += 1 print(f" āœ… Kept: '{content[:50]}'") except Exception as e: print(f" āŒ Error updating {point_id}: {e}") # Report results print("\n" + "=" * 70) print("CONSOLIDATION COMPLETE") print("=" * 70) print(f"šŸ“Š Total processed: {stats['total']}") print(f"āœ… Kept: {stats['kept']}") print(f"šŸ—‘ļø Deleted: {stats['deleted']}") print(f"šŸ“ˆ Retention rate: {stats['kept']/stats['total']*100:.1f}%") print("=" * 70) if __name__ == "__main__": main()