Files
miku-discord/cheshire-cat/manual_consolidation.py
koko210Serve ae1e0aa144 add: cheshire-cat configuration, tooling, tests, and documentation
Configuration:
- .env.example, .gitignore, compose.yml (main docker compose)
- docker-compose-amd.yml (ROCm), docker-compose-macos.yml
- start.sh, stop.sh convenience scripts
- LICENSE (Apache 2.0, from upstream Cheshire Cat)

Memory management utilities:
- analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py
- check_memories.py, extract_declarative_facts.py, store_declarative_facts.py
- compare_systems.py (system comparison tool)
- benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py

Test suite:
- quick_test.py, test_setup.py, test_setup_simple.py
- test_consolidation_direct.py, test_declarative_recall.py, test_recall.py
- test_end_to_end.py, test_full_pipeline.py
- test_phase2.py, test_phase2_comprehensive.py

Documentation:
- README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md
- PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md
- POST_OPTIMIZATION_ANALYSIS.md
2026-03-04 00:51:14 +02:00

151 lines
4.9 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Manual Memory Consolidation Script
Directly connects to Qdrant and performs consolidation logic:
1. Query for all memories with consolidated=False
2. Apply heuristic: delete trivial ("lol", "k", ≤2 chars)
3. Mark kept memories as consolidated=True
4. Report stats
This bypasses the Cat's plugin system for direct testing.
"""
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
import sys
# Qdrant connection
QDRANT_HOST = "localhost"
QDRANT_PORT = 6333
COLLECTION_NAME = "episodic"
def main():
print("=" * 70)
print("MANUAL MEMORY CONSOLIDATION")
print("=" * 70)
# Connect to Qdrant
print(f"\n📡 Connecting to Qdrant at {QDRANT_HOST}:{QDRANT_PORT}...")
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT, timeout=10, prefer_grpc=False)
# Check collection exists
try:
collection_info = client.get_collection(COLLECTION_NAME)
print(f"✅ Connected to collection '{COLLECTION_NAME}'")
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
# Query for ALL memories (since the field might not exist yet)
print(f"\n🔍 Querying for all memories...")
try:
# Get all memories - we'll filter based on metadata presence
results, next_offset = client.scroll(
collection_name=COLLECTION_NAME,
limit=1000,
with_payload=True,
with_vectors=False
)
print(f"✅ Found {len(results)} total memories")
# Filter to only unconsolidated ones (those without the field or with False)
unconsolidated = []
for point in results:
metadata = point.payload.get('metadata', {})
consolidated = metadata.get('consolidated', False)
if not consolidated:
unconsolidated.append(point)
print(f"📊 Unconsolidated: {len(unconsolidated)}")
if len(unconsolidated) == 0:
print("\n⚠️ No unconsolidated memories found!")
print("All memories have already been consolidated.")
return
# Use the unconsolidated subset for processing
results = unconsolidated
except Exception as e:
print(f"❌ Error querying memories: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
# Process each memory
print(f"\n🔧 Processing memories...")
stats = {
'total': len(results),
'kept': 0,
'deleted': 0
}
# Expanded trivial patterns - common reactions and abbreviations
trivial_patterns = [
'lol', 'k', 'ok', 'okay', 'haha', 'lmao', 'xd', 'rofl', 'lmfao',
'brb', 'gtg', 'afk', 'ttyl', 'lmk', 'idk', 'tbh', 'imo', 'imho',
'omg', 'wtf', 'fyi', 'btw', 'nvm', 'jk', 'ikr', 'smh',
'hehe', 'heh', 'gg', 'wp', 'gz', 'gj', 'ty', 'thx', 'np', 'yw'
]
for point in results:
point_id = point.id
content = point.payload.get('page_content', '')
metadata = point.payload.get('metadata', {})
# Apply heuristic
is_trivial = False
# Check length (1-3 chars that are just letters/common patterns)
if len(content.strip()) <= 3:
# Check if it's just letters or in trivial patterns
if content.lower().strip() in trivial_patterns or content.strip().isalpha():
is_trivial = True
# Check if it's a common reaction/abbreviation
if content.lower().strip() in trivial_patterns:
is_trivial = True
if is_trivial:
# DELETE trivial memory
try:
client.delete(
collection_name=COLLECTION_NAME,
points_selector=[point_id]
)
stats['deleted'] += 1
print(f" 🗑️ Deleted: '{content[:50]}'")
except Exception as e:
print(f" ❌ Error deleting {point_id}: {e}")
else:
# KEEP important memory - mark as consolidated
try:
metadata['consolidated'] = True
client.set_payload(
collection_name=COLLECTION_NAME,
payload={"metadata": metadata},
points=[point_id]
)
stats['kept'] += 1
print(f" ✅ Kept: '{content[:50]}'")
except Exception as e:
print(f" ❌ Error updating {point_id}: {e}")
# Report results
print("\n" + "=" * 70)
print("CONSOLIDATION COMPLETE")
print("=" * 70)
print(f"📊 Total processed: {stats['total']}")
print(f"✅ Kept: {stats['kept']}")
print(f"🗑️ Deleted: {stats['deleted']}")
print(f"📈 Retention rate: {stats['kept']/stats['total']*100:.1f}%")
print("=" * 70)
if __name__ == "__main__":
main()