2025-12-07 17:15:09 +02:00
|
|
|
|
# utils/core.py
|
|
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
|
import aiohttp
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
import globals
|
|
|
|
|
|
from langchain_community.vectorstores import FAISS
|
|
|
|
|
|
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
|
|
|
|
|
from langchain_core.documents import Document
|
feat: Implement comprehensive non-hierarchical logging system
- Created new logging infrastructure with per-component filtering
- Added 6 log levels: DEBUG, INFO, API, WARNING, ERROR, CRITICAL
- Implemented non-hierarchical level control (any combination can be enabled)
- Migrated 917 print() statements across 31 files to structured logging
- Created web UI (system.html) for runtime configuration with dark theme
- Added global level controls to enable/disable levels across all components
- Added timestamp format control (off/time/date/datetime options)
- Implemented log rotation (10MB per file, 5 backups)
- Added API endpoints for dynamic log configuration
- Configured HTTP request logging with filtering via api.requests component
- Intercepted APScheduler logs with proper formatting
- Fixed persistence paths to use /app/memory for Docker volume compatibility
- Fixed checkbox display bug in web UI (enabled_levels now properly shown)
- Changed System Settings button to open in same tab instead of new window
Components: bot, api, api.requests, autonomous, persona, vision, llm,
conversation, mood, dm, scheduled, gpu, media, server, commands,
sentiment, core, apscheduler
All settings persist across container restarts via JSON config.
2026-01-10 20:46:19 +02:00
|
|
|
|
from utils.logger import get_logger
|
|
|
|
|
|
|
|
|
|
|
|
logger = get_logger('core')
|
2025-12-07 17:15:09 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# switch_model() removed - llama-swap handles model switching automatically
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def is_miku_addressed(message) -> bool:
|
|
|
|
|
|
# Check if this is a DM (no guild)
|
|
|
|
|
|
if message.guild is None:
|
|
|
|
|
|
# In DMs, always respond to every message
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
# Safety check: ensure guild and guild.me exist
|
|
|
|
|
|
if not message.guild or not message.guild.me:
|
feat: Implement comprehensive non-hierarchical logging system
- Created new logging infrastructure with per-component filtering
- Added 6 log levels: DEBUG, INFO, API, WARNING, ERROR, CRITICAL
- Implemented non-hierarchical level control (any combination can be enabled)
- Migrated 917 print() statements across 31 files to structured logging
- Created web UI (system.html) for runtime configuration with dark theme
- Added global level controls to enable/disable levels across all components
- Added timestamp format control (off/time/date/datetime options)
- Implemented log rotation (10MB per file, 5 backups)
- Added API endpoints for dynamic log configuration
- Configured HTTP request logging with filtering via api.requests component
- Intercepted APScheduler logs with proper formatting
- Fixed persistence paths to use /app/memory for Docker volume compatibility
- Fixed checkbox display bug in web UI (enabled_levels now properly shown)
- Changed System Settings button to open in same tab instead of new window
Components: bot, api, api.requests, autonomous, persona, vision, llm,
conversation, mood, dm, scheduled, gpu, media, server, commands,
sentiment, core, apscheduler
All settings persist across container restarts via JSON config.
2026-01-10 20:46:19 +02:00
|
|
|
|
logger.warning(f"Invalid guild or guild.me in message from {message.author}")
|
2025-12-07 17:15:09 +02:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
# If message contains a ping for Miku, return true
|
|
|
|
|
|
if message.guild.me in message.mentions:
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
# If message is a reply, check the referenced message author
|
|
|
|
|
|
if message.reference:
|
|
|
|
|
|
try:
|
|
|
|
|
|
referenced_msg = await message.channel.fetch_message(message.reference.message_id)
|
|
|
|
|
|
if referenced_msg.author == message.guild.me:
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
feat: Implement comprehensive non-hierarchical logging system
- Created new logging infrastructure with per-component filtering
- Added 6 log levels: DEBUG, INFO, API, WARNING, ERROR, CRITICAL
- Implemented non-hierarchical level control (any combination can be enabled)
- Migrated 917 print() statements across 31 files to structured logging
- Created web UI (system.html) for runtime configuration with dark theme
- Added global level controls to enable/disable levels across all components
- Added timestamp format control (off/time/date/datetime options)
- Implemented log rotation (10MB per file, 5 backups)
- Added API endpoints for dynamic log configuration
- Configured HTTP request logging with filtering via api.requests component
- Intercepted APScheduler logs with proper formatting
- Fixed persistence paths to use /app/memory for Docker volume compatibility
- Fixed checkbox display bug in web UI (enabled_levels now properly shown)
- Changed System Settings button to open in same tab instead of new window
Components: bot, api, api.requests, autonomous, persona, vision, llm,
conversation, mood, dm, scheduled, gpu, media, server, commands,
sentiment, core, apscheduler
All settings persist across container restarts via JSON config.
2026-01-10 20:46:19 +02:00
|
|
|
|
logger.warning(f"Could not fetch referenced message: {e}")
|
2025-12-07 17:15:09 +02:00
|
|
|
|
|
2026-01-30 21:34:24 +02:00
|
|
|
|
cleaned = message.content.strip()
|
|
|
|
|
|
cleaned_lower = cleaned.lower()
|
2026-01-27 19:53:18 +02:00
|
|
|
|
|
|
|
|
|
|
# Base names for Miku in different scripts
|
|
|
|
|
|
base_names = [
|
|
|
|
|
|
'miku', 'мику', 'みく', 'ミク', '未来'
|
|
|
|
|
|
]
|
|
|
|
|
|
|
2026-01-30 21:34:24 +02:00
|
|
|
|
# Japanese honorifics - all scripts combined
|
|
|
|
|
|
honorifics = [
|
2026-01-27 19:53:18 +02:00
|
|
|
|
# Latin
|
|
|
|
|
|
'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
|
|
|
|
|
|
'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei', 'senpai', 'jou',
|
|
|
|
|
|
# Hiragana
|
|
|
|
|
|
'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん', 'へいか',
|
|
|
|
|
|
'でんか', 'かっか', 'し', 'ちゃま', 'きゅん', 'どの', 'せんせい', 'せんぱい', 'じょう',
|
|
|
|
|
|
# Katakana
|
|
|
|
|
|
'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン', 'ヘイカ',
|
|
|
|
|
|
'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ', 'センセイ', 'センパイ', 'ジョウ',
|
|
|
|
|
|
# Cyrillic
|
2026-01-30 21:34:24 +02:00
|
|
|
|
'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин', 'хейка', 'хеика',
|
|
|
|
|
|
'денка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо'
|
2026-01-27 19:53:18 +02:00
|
|
|
|
]
|
|
|
|
|
|
|
2026-01-30 21:34:24 +02:00
|
|
|
|
# o- prefix variants
|
2026-01-27 19:53:18 +02:00
|
|
|
|
o_prefixes = ['o-', 'о-', 'お', 'オ']
|
|
|
|
|
|
|
2026-01-30 21:34:24 +02:00
|
|
|
|
# Build all possible name variations to check
|
|
|
|
|
|
name_patterns = []
|
2026-01-27 19:53:18 +02:00
|
|
|
|
|
|
|
|
|
|
for base in base_names:
|
|
|
|
|
|
base_lower = base.lower()
|
2026-01-30 21:34:24 +02:00
|
|
|
|
base_escaped = re.escape(base_lower)
|
2026-01-27 19:53:18 +02:00
|
|
|
|
|
2026-01-30 21:34:24 +02:00
|
|
|
|
# Base name alone
|
|
|
|
|
|
name_patterns.append(base_escaped)
|
2026-01-27 19:53:18 +02:00
|
|
|
|
|
2026-01-30 21:34:24 +02:00
|
|
|
|
# With honorifics (allows optional dash/space between)
|
|
|
|
|
|
for honorific in honorifics:
|
2026-01-27 19:53:18 +02:00
|
|
|
|
honorific_lower = honorific.lower()
|
2026-01-30 21:34:24 +02:00
|
|
|
|
honorific_escaped = re.escape(honorific_lower)
|
|
|
|
|
|
# Build pattern: base + optional [dash or space] + honorific
|
|
|
|
|
|
name_patterns.append(base_escaped + r'[\-\s]*' + honorific_escaped)
|
2026-01-27 19:53:18 +02:00
|
|
|
|
|
2026-01-30 21:34:24 +02:00
|
|
|
|
# With o- prefix
|
2026-01-27 19:53:18 +02:00
|
|
|
|
for prefix in o_prefixes:
|
2026-01-30 21:34:24 +02:00
|
|
|
|
prefix_lower = prefix.lower()
|
|
|
|
|
|
prefix_escaped = re.escape(prefix_lower)
|
|
|
|
|
|
# o-prefix + optional space + base
|
|
|
|
|
|
name_patterns.append(prefix_escaped + r'\s*' + base_escaped)
|
|
|
|
|
|
|
|
|
|
|
|
# With o- prefix + honorific
|
|
|
|
|
|
for honorific in honorifics:
|
2026-01-27 19:53:18 +02:00
|
|
|
|
honorific_lower = honorific.lower()
|
2026-01-30 21:34:24 +02:00
|
|
|
|
honorific_escaped = re.escape(honorific_lower)
|
|
|
|
|
|
# o-prefix + space + base + dash/space + honorific
|
|
|
|
|
|
name_patterns.append(prefix_escaped + r'\s*' + base_escaped + r'[\-\s]*' + honorific_escaped)
|
|
|
|
|
|
|
|
|
|
|
|
# Check all patterns - she must be "addressed" not just mentioned
|
|
|
|
|
|
for pattern in name_patterns:
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Pattern 1: Start of message + punctuation/end
|
|
|
|
|
|
# "Miku, ..." or "みく!" or "ミクちゃん、..."
|
|
|
|
|
|
start_p = r'^' + pattern + r'(?:[,,、!!??.。\s]+|$)'
|
|
|
|
|
|
if re.search(start_p, cleaned_lower, re.IGNORECASE):
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
# Pattern 2: End of message (optionally preceded by punctuation)
|
|
|
|
|
|
# "..., Miku" or "...みく" or "...ミクちゃん!"
|
|
|
|
|
|
end_p = r'(?:[,,、!!??.。\s]+|^)' + pattern + r'[!!??.。\s]*$'
|
|
|
|
|
|
if re.search(end_p, cleaned_lower, re.IGNORECASE):
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
# Pattern 3: Middle (surrounded by punctuation)
|
|
|
|
|
|
# "..., Miku, ..." or "...、ミク、..."
|
|
|
|
|
|
middle_p = r'[,,、!!??.。\s]+' + pattern + r'[,,、!!??.。\s]+'
|
|
|
|
|
|
if re.search(middle_p, cleaned_lower, re.IGNORECASE):
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
# Pattern 4: Just the name alone
|
|
|
|
|
|
# "Miku" or "みく!" or "ミクちゃん"
|
|
|
|
|
|
alone_p = r'^\s*' + pattern + r'[!!??.。]*\s*$'
|
|
|
|
|
|
if re.search(alone_p, cleaned_lower, re.IGNORECASE):
|
|
|
|
|
|
return True
|
|
|
|
|
|
except re.error as e:
|
|
|
|
|
|
# Log the problematic pattern and skip it
|
|
|
|
|
|
logger.error(f"REGEX ERROR - Pattern: '{pattern}' | Start regex: '{start_p}' | Error: {e}")
|
|
|
|
|
|
continue
|
2026-01-27 19:53:18 +02:00
|
|
|
|
|
|
|
|
|
|
return False
|
2025-12-07 17:15:09 +02:00
|
|
|
|
|
|
|
|
|
|
# Vectorstore functionality disabled - not needed with current structured context approach
|
|
|
|
|
|
# If you need embeddings in the future, you can use a different embedding provider
|
|
|
|
|
|
# For now, the bot uses structured prompts from context_manager.py
|
|
|
|
|
|
|
|
|
|
|
|
# def load_miku_knowledge():
|
|
|
|
|
|
# with open("miku_lore.txt", "r", encoding="utf-8") as f:
|
|
|
|
|
|
# text = f.read()
|
|
|
|
|
|
#
|
|
|
|
|
|
# from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
|
|
|
|
#
|
|
|
|
|
|
# text_splitter = RecursiveCharacterTextSplitter(
|
|
|
|
|
|
# chunk_size=520,
|
|
|
|
|
|
# chunk_overlap=50,
|
|
|
|
|
|
# separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
|
|
|
|
|
|
# )
|
|
|
|
|
|
#
|
|
|
|
|
|
# docs = [Document(page_content=chunk) for chunk in text_splitter.split_text(text)]
|
|
|
|
|
|
#
|
|
|
|
|
|
# vectorstore = FAISS.from_documents(docs, embeddings)
|
|
|
|
|
|
# return vectorstore
|
|
|
|
|
|
#
|
|
|
|
|
|
# def load_miku_lyrics():
|
|
|
|
|
|
# with open("miku_lyrics.txt", "r", encoding="utf-8") as f:
|
|
|
|
|
|
# lyrics_text = f.read()
|
|
|
|
|
|
#
|
|
|
|
|
|
# text_splitter = CharacterTextSplitter(chunk_size=520, chunk_overlap=50)
|
|
|
|
|
|
# docs = [Document(page_content=chunk) for chunk in text_splitter.split_text(lyrics_text)]
|
|
|
|
|
|
#
|
|
|
|
|
|
# vectorstore = FAISS.from_documents(docs, embeddings)
|
|
|
|
|
|
# return vectorstore
|
|
|
|
|
|
#
|
|
|
|
|
|
# miku_vectorstore = load_miku_knowledge()
|
|
|
|
|
|
# miku_lyrics_vectorstore = load_miku_lyrics()
|