# utils/core.py import asyncio import aiohttp import re import globals from langchain_community.vectorstores import FAISS from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter from langchain_core.documents import Document from utils.logger import get_logger logger = get_logger('core') # switch_model() removed - llama-swap handles model switching automatically async def is_miku_addressed(message) -> bool: # Check if this is a DM (no guild) if message.guild is None: # In DMs, always respond to every message return True # Safety check: ensure guild and guild.me exist if not message.guild or not message.guild.me: logger.warning(f"Invalid guild or guild.me in message from {message.author}") return False # If message contains a ping for Miku, return true if message.guild.me in message.mentions: return True # If message is a reply, check the referenced message author if message.reference: try: referenced_msg = await message.channel.fetch_message(message.reference.message_id) if referenced_msg.author == message.guild.me: return True except Exception as e: logger.warning(f"Could not fetch referenced message: {e}") cleaned = message.content.strip() cleaned_lower = cleaned.lower() # Base names for Miku in different scripts base_names = [ 'miku', 'мику', 'みく', 'ミク', '未来' ] # Japanese honorifics - all scripts combined honorifics = [ # Latin 'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika', 'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei', 'senpai', 'jou', # Hiragana 'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん', 'へいか', 'でんか', 'かっか', 'し', 'ちゃま', 'きゅん', 'どの', 'せんせい', 'せんぱい', 'じょう', # Katakana 'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン', 'ヘイカ', 'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ', 'センセイ', 'センパイ', 'ジョウ', # Cyrillic 'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин', 'хейка', 'хеика', 'денка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо' ] # o- prefix variants o_prefixes = ['o-', 'о-', 'お', 'オ'] # Build all possible name variations to check name_patterns = [] for base in base_names: base_lower = base.lower() base_escaped = re.escape(base_lower) # Base name alone name_patterns.append(base_escaped) # With honorifics (allows optional dash/space between) for honorific in honorifics: honorific_lower = honorific.lower() honorific_escaped = re.escape(honorific_lower) # Build pattern: base + optional [dash or space] + honorific name_patterns.append(base_escaped + r'[\-\s]*' + honorific_escaped) # With o- prefix for prefix in o_prefixes: prefix_lower = prefix.lower() prefix_escaped = re.escape(prefix_lower) # o-prefix + optional space + base name_patterns.append(prefix_escaped + r'\s*' + base_escaped) # With o- prefix + honorific for honorific in honorifics: honorific_lower = honorific.lower() honorific_escaped = re.escape(honorific_lower) # o-prefix + space + base + dash/space + honorific name_patterns.append(prefix_escaped + r'\s*' + base_escaped + r'[\-\s]*' + honorific_escaped) # Check all patterns - she must be "addressed" not just mentioned for pattern in name_patterns: try: # Pattern 1: Start of message + punctuation/end # "Miku, ..." or "みく!" or "ミクちゃん、..." start_p = r'^' + pattern + r'(?:[,,、!!??.。\s]+|$)' if re.search(start_p, cleaned_lower, re.IGNORECASE): return True # Pattern 2: End of message (optionally preceded by punctuation) # "..., Miku" or "...みく" or "...ミクちゃん!" end_p = r'(?:[,,、!!??.。\s]+|^)' + pattern + r'[!!??.。\s]*$' if re.search(end_p, cleaned_lower, re.IGNORECASE): return True # Pattern 3: Middle (surrounded by punctuation) # "..., Miku, ..." or "...、ミク、..." middle_p = r'[,,、!!??.。\s]+' + pattern + r'[,,、!!??.。\s]+' if re.search(middle_p, cleaned_lower, re.IGNORECASE): return True # Pattern 4: Just the name alone # "Miku" or "みく!" or "ミクちゃん" alone_p = r'^\s*' + pattern + r'[!!??.。]*\s*$' if re.search(alone_p, cleaned_lower, re.IGNORECASE): return True except re.error as e: # Log the problematic pattern and skip it logger.error(f"REGEX ERROR - Pattern: '{pattern}' | Start regex: '{start_p}' | Error: {e}") continue return False # Vectorstore functionality disabled - not needed with current structured context approach # If you need embeddings in the future, you can use a different embedding provider # For now, the bot uses structured prompts from context_manager.py # def load_miku_knowledge(): # with open("miku_lore.txt", "r", encoding="utf-8") as f: # text = f.read() # # from langchain_text_splitters import RecursiveCharacterTextSplitter # # text_splitter = RecursiveCharacterTextSplitter( # chunk_size=520, # chunk_overlap=50, # separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""] # ) # # docs = [Document(page_content=chunk) for chunk in text_splitter.split_text(text)] # # vectorstore = FAISS.from_documents(docs, embeddings) # return vectorstore # # def load_miku_lyrics(): # with open("miku_lyrics.txt", "r", encoding="utf-8") as f: # lyrics_text = f.read() # # text_splitter = CharacterTextSplitter(chunk_size=520, chunk_overlap=50) # docs = [Document(page_content=chunk) for chunk in text_splitter.split_text(lyrics_text)] # # vectorstore = FAISS.from_documents(docs, embeddings) # return vectorstore # # miku_vectorstore = load_miku_knowledge() # miku_lyrics_vectorstore = load_miku_lyrics()