add: cheshire-cat configuration, tooling, tests, and documentation
Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
This commit is contained in:
150
cheshire-cat/manual_consolidation.py
Executable file
150
cheshire-cat/manual_consolidation.py
Executable file
@@ -0,0 +1,150 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Manual Memory Consolidation Script
|
||||
|
||||
Directly connects to Qdrant and performs consolidation logic:
|
||||
1. Query for all memories with consolidated=False
|
||||
2. Apply heuristic: delete trivial ("lol", "k", ≤2 chars)
|
||||
3. Mark kept memories as consolidated=True
|
||||
4. Report stats
|
||||
|
||||
This bypasses the Cat's plugin system for direct testing.
|
||||
"""
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
||||
import sys
|
||||
|
||||
# Qdrant connection
|
||||
QDRANT_HOST = "localhost"
|
||||
QDRANT_PORT = 6333
|
||||
COLLECTION_NAME = "episodic"
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("MANUAL MEMORY CONSOLIDATION")
|
||||
print("=" * 70)
|
||||
|
||||
# Connect to Qdrant
|
||||
print(f"\n📡 Connecting to Qdrant at {QDRANT_HOST}:{QDRANT_PORT}...")
|
||||
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT, timeout=10, prefer_grpc=False)
|
||||
|
||||
# Check collection exists
|
||||
try:
|
||||
collection_info = client.get_collection(COLLECTION_NAME)
|
||||
print(f"✅ Connected to collection '{COLLECTION_NAME}'")
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Query for ALL memories (since the field might not exist yet)
|
||||
print(f"\n🔍 Querying for all memories...")
|
||||
|
||||
try:
|
||||
# Get all memories - we'll filter based on metadata presence
|
||||
results, next_offset = client.scroll(
|
||||
collection_name=COLLECTION_NAME,
|
||||
limit=1000,
|
||||
with_payload=True,
|
||||
with_vectors=False
|
||||
)
|
||||
|
||||
print(f"✅ Found {len(results)} total memories")
|
||||
|
||||
# Filter to only unconsolidated ones (those without the field or with False)
|
||||
unconsolidated = []
|
||||
for point in results:
|
||||
metadata = point.payload.get('metadata', {})
|
||||
consolidated = metadata.get('consolidated', False)
|
||||
if not consolidated:
|
||||
unconsolidated.append(point)
|
||||
|
||||
print(f"📊 Unconsolidated: {len(unconsolidated)}")
|
||||
|
||||
if len(unconsolidated) == 0:
|
||||
print("\n⚠️ No unconsolidated memories found!")
|
||||
print("All memories have already been consolidated.")
|
||||
return
|
||||
|
||||
# Use the unconsolidated subset for processing
|
||||
results = unconsolidated
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error querying memories: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
# Process each memory
|
||||
print(f"\n🔧 Processing memories...")
|
||||
stats = {
|
||||
'total': len(results),
|
||||
'kept': 0,
|
||||
'deleted': 0
|
||||
}
|
||||
|
||||
# Expanded trivial patterns - common reactions and abbreviations
|
||||
trivial_patterns = [
|
||||
'lol', 'k', 'ok', 'okay', 'haha', 'lmao', 'xd', 'rofl', 'lmfao',
|
||||
'brb', 'gtg', 'afk', 'ttyl', 'lmk', 'idk', 'tbh', 'imo', 'imho',
|
||||
'omg', 'wtf', 'fyi', 'btw', 'nvm', 'jk', 'ikr', 'smh',
|
||||
'hehe', 'heh', 'gg', 'wp', 'gz', 'gj', 'ty', 'thx', 'np', 'yw'
|
||||
]
|
||||
|
||||
for point in results:
|
||||
point_id = point.id
|
||||
content = point.payload.get('page_content', '')
|
||||
metadata = point.payload.get('metadata', {})
|
||||
|
||||
# Apply heuristic
|
||||
is_trivial = False
|
||||
|
||||
# Check length (1-3 chars that are just letters/common patterns)
|
||||
if len(content.strip()) <= 3:
|
||||
# Check if it's just letters or in trivial patterns
|
||||
if content.lower().strip() in trivial_patterns or content.strip().isalpha():
|
||||
is_trivial = True
|
||||
|
||||
# Check if it's a common reaction/abbreviation
|
||||
if content.lower().strip() in trivial_patterns:
|
||||
is_trivial = True
|
||||
|
||||
if is_trivial:
|
||||
# DELETE trivial memory
|
||||
try:
|
||||
client.delete(
|
||||
collection_name=COLLECTION_NAME,
|
||||
points_selector=[point_id]
|
||||
)
|
||||
stats['deleted'] += 1
|
||||
print(f" 🗑️ Deleted: '{content[:50]}'")
|
||||
except Exception as e:
|
||||
print(f" ❌ Error deleting {point_id}: {e}")
|
||||
else:
|
||||
# KEEP important memory - mark as consolidated
|
||||
try:
|
||||
metadata['consolidated'] = True
|
||||
client.set_payload(
|
||||
collection_name=COLLECTION_NAME,
|
||||
payload={"metadata": metadata},
|
||||
points=[point_id]
|
||||
)
|
||||
stats['kept'] += 1
|
||||
print(f" ✅ Kept: '{content[:50]}'")
|
||||
except Exception as e:
|
||||
print(f" ❌ Error updating {point_id}: {e}")
|
||||
|
||||
# Report results
|
||||
print("\n" + "=" * 70)
|
||||
print("CONSOLIDATION COMPLETE")
|
||||
print("=" * 70)
|
||||
print(f"📊 Total processed: {stats['total']}")
|
||||
print(f"✅ Kept: {stats['kept']}")
|
||||
print(f"🗑️ Deleted: {stats['deleted']}")
|
||||
print(f"📈 Retention rate: {stats['kept']/stats['total']*100:.1f}%")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user