fix: Phase 2 integrity review - v2.0.0 rewrite & bugfixes
Memory Consolidation Plugin (828 -> 465 lines): - Replace SentenceTransformer with cat.embedder.embed_query() for vector consistency - Fix per-user fact isolation: source=user_id instead of global - Add duplicate fact detection (_is_duplicate_fact, score_threshold=0.85) - Remove ~350 lines of dead async run_consolidation() code - Remove duplicate declarative search in before_cat_sends_message - Unify trivial patterns into TRIVIAL_PATTERNS frozenset - Remove all sys.stderr.write debug logging - Remove sentence-transformers from requirements.txt (no external deps) Loguru Fix (cheshire-cat/cat/log.py): - Patch Cat v1.6.2 loguru format to provide default extra fields - Fixes KeyError: 'original_name' from third-party libs (fastembed) - Mounted via docker-compose volume Discord Bridge: - Copy discord_bridge.py to cat-plugins/ (was empty directory) Test Results (6/7 pass, 100% fact recall): - 11 facts extracted, per-user isolation working - Duplicate detection effective (+2 on 2nd run) - 5/5 natural language recall queries correct
This commit is contained in:
109
cat-plugins/discord_bridge/discord_bridge.py
Normal file
109
cat-plugins/discord_bridge/discord_bridge.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Discord Bridge Plugin for Cheshire Cat
|
||||
|
||||
This plugin enriches Cat's memory system with Discord context:
|
||||
- Unified user identity across all servers and DMs
|
||||
- Guild/channel metadata for context tracking
|
||||
- Minimal filtering before storage (only skip obvious junk)
|
||||
- Marks memories as unconsolidated for nightly processing
|
||||
|
||||
Phase 1 Implementation
|
||||
"""
|
||||
|
||||
from cat.mad_hatter.decorators import hook
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
|
||||
@hook(priority=100)
|
||||
def before_cat_reads_message(user_message_json: dict, cat) -> dict:
|
||||
"""
|
||||
Enrich incoming message with Discord metadata.
|
||||
This runs BEFORE the message is processed.
|
||||
"""
|
||||
# Extract Discord context from working memory or metadata
|
||||
# These will be set by the Discord bot when calling the Cat API
|
||||
guild_id = cat.working_memory.get('guild_id')
|
||||
channel_id = cat.working_memory.get('channel_id')
|
||||
|
||||
# Add to message metadata for later use
|
||||
if 'metadata' not in user_message_json:
|
||||
user_message_json['metadata'] = {}
|
||||
|
||||
user_message_json['metadata']['guild_id'] = guild_id or 'dm'
|
||||
user_message_json['metadata']['channel_id'] = channel_id
|
||||
user_message_json['metadata']['timestamp'] = datetime.now().isoformat()
|
||||
|
||||
return user_message_json
|
||||
|
||||
|
||||
@hook(priority=100)
|
||||
def before_cat_stores_episodic_memory(doc, cat):
|
||||
"""
|
||||
Filter and enrich memories before storage.
|
||||
|
||||
Phase 1: Minimal filtering
|
||||
- Skip only obvious junk (1-2 char messages, pure reactions)
|
||||
- Store everything else temporarily
|
||||
- Mark as unconsolidated for nightly processing
|
||||
"""
|
||||
message = doc.page_content.strip()
|
||||
|
||||
# Skip only the most trivial messages
|
||||
skip_patterns = [
|
||||
r'^\w{1,2}$', # 1-2 character messages: "k", "ok"
|
||||
r'^(lol|lmao|haha|hehe|xd|rofl)$', # Pure reactions
|
||||
r'^:[\w_]+:$', # Discord emoji only: ":smile:"
|
||||
]
|
||||
|
||||
for pattern in skip_patterns:
|
||||
if re.match(pattern, message.lower()):
|
||||
print(f"🗑️ [Discord Bridge] Skipping trivial message: {message}")
|
||||
return None # Don't store at all
|
||||
|
||||
# Add Discord metadata to memory
|
||||
doc.metadata['consolidated'] = False # Needs nightly processing
|
||||
doc.metadata['stored_at'] = datetime.now().isoformat()
|
||||
|
||||
# Get Discord context from working memory
|
||||
guild_id = cat.working_memory.get('guild_id')
|
||||
channel_id = cat.working_memory.get('channel_id')
|
||||
|
||||
doc.metadata['guild_id'] = guild_id or 'dm'
|
||||
doc.metadata['channel_id'] = channel_id
|
||||
doc.metadata['source'] = cat.user_id # CRITICAL: Cat filters episodic by source=user_id!
|
||||
doc.metadata['discord_source'] = 'discord' # Keep original value as separate field
|
||||
|
||||
print(f"💾 [Discord Bridge] Storing memory (unconsolidated): {message[:50]}...")
|
||||
print(f" User: {cat.user_id}, Guild: {doc.metadata['guild_id']}, Channel: {channel_id}")
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
@hook(priority=50)
|
||||
def after_cat_recalls_memories(cat):
|
||||
"""
|
||||
Log memory recall for debugging.
|
||||
Access recalled memories via cat.working_memory.
|
||||
"""
|
||||
import sys
|
||||
sys.stderr.write("🧠 [Discord Bridge] after_cat_recalls_memories HOOK CALLED!\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
# Get recalled memories from working memory
|
||||
episodic_memories = cat.working_memory.get('episodic_memories', [])
|
||||
declarative_memories = cat.working_memory.get('declarative_memories', [])
|
||||
|
||||
if episodic_memories:
|
||||
print(f"🧠 [Discord Bridge] Recalled {len(episodic_memories)} episodic memories for user {cat.user_id}")
|
||||
# Show which guilds the memories are from
|
||||
guilds = set()
|
||||
for doc, score in episodic_memories:
|
||||
guild = doc.metadata.get('guild_id', 'unknown')
|
||||
guilds.add(guild)
|
||||
print(f" From guilds: {', '.join(guilds)}")
|
||||
|
||||
|
||||
# Plugin metadata
|
||||
__version__ = "1.0.0"
|
||||
__description__ = "Discord bridge with unified user identity and sleep consolidation support"
|
||||
Reference in New Issue
Block a user