Files
miku-discord/bot/utils/core.py

166 lines
6.4 KiB
Python
Raw Normal View History

2025-12-07 17:15:09 +02:00
# utils/core.py
#
# Detects whether a Discord message is **addressed to** Miku
# (as opposed to merely mentioning her).
2025-12-07 17:15:09 +02:00
import re
from utils.logger import get_logger
logger = get_logger('core')
2025-12-07 17:15:09 +02:00
# ────────────────────────────────────────────────────────────────────
# Pre-compiled Miku addressing patterns
# Built once at module load; is_miku_addressed() runs only 4 .search()
# ────────────────────────────────────────────────────────────────────
def _build_name_variants(bases, honorifics, prefixes, connector, prefix_connector):
"""Return regex fragments for every name+honorific+prefix combo
within a single script family."""
variants = []
for base in bases:
be = re.escape(base)
variants.append(be)
for h in honorifics:
he = re.escape(h)
variants.append(be + connector + he)
for p in prefixes:
pe = re.escape(p)
variants.append(pe + prefix_connector + be)
for h in honorifics:
he = re.escape(h)
variants.append(pe + prefix_connector + be + connector + he)
return variants
def _compile_addressing_patterns():
"""Compile the four addressing regexes.
START name at the beginning, followed by punctuation
"Miku, how are you?" "みく!聞いて"
END comma then name at the end
"how are you, Miku?" "教えて、ミク"
MIDDLE name surrounded by commas (vocative)
"On the contrary, Miku, I think…"
ALONE name is the entire message
"Miku" "みく!" "ミクちゃん"
"""
latin = _build_name_variants(
bases=['miku'],
honorifics=[
'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei',
'senpai', 'jou',
],
prefixes=['o-'],
connector=r'[\-\s]?',
prefix_connector=r'\s?',
)
cyrillic = _build_name_variants(
bases=['мику'],
honorifics=[
'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин',
'хейка', 'хеика', 'денка', 'какка', 'си', 'чама', 'кюн',
'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо',
],
prefixes=['о-'],
connector=r'[\-\s]?',
prefix_connector=r'\s?',
)
japanese = _build_name_variants(
bases=['みく', 'ミク', '未来'],
honorifics=[
# Hiragana
'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん',
'へいか', 'でんか', 'かっか', '', 'ちゃま', 'きゅん', 'どの',
'せんせい', 'せんぱい', 'じょう',
# Katakana
'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン',
'ヘイカ', 'デンカ', 'カッカ', '', 'チャマ', 'キュン', 'ドノ',
'センセイ', 'センパイ', 'ジョウ',
],
prefixes=['', ''],
connector=r'[-]?',
prefix_connector=r'',
)
# Longest-first so the regex engine prefers the most specific match
all_v = sorted(latin + cyrillic + japanese, key=len, reverse=True)
alts = '|'.join(all_v)
NAME = rf'\b(?:{alts})\b'
PUNCT = r'[,,、:!?.。]' # addressing punctuation after name
COMMA = r'[,,、]' # comma variants (before name / vocative)
ETRAIL = r'[!?.。~]*' # optional trailing at end
ATRAIL = r'[!?.。~~♪♡❤]*' # optional trailing for name-only messages
start_re = re.compile(rf'^\s*{NAME}\s*{PUNCT}', re.IGNORECASE)
end_re = re.compile(rf'{COMMA}\s*{NAME}\s*{ETRAIL}\s*$', re.IGNORECASE)
middle_re = re.compile(rf'{COMMA}\s*{NAME}\s*{COMMA}', re.IGNORECASE)
alone_re = re.compile(rf'^\s*{NAME}\s*{ATRAIL}\s*$', re.IGNORECASE)
logger.info(f"Miku addressing: {len(all_v)} name variants compiled into 4 patterns")
return start_re, end_re, middle_re, alone_re
try:
_START_RE, _END_RE, _MIDDLE_RE, _ALONE_RE = _compile_addressing_patterns()
except Exception as e:
logger.error(f"Failed to compile addressing patterns: {e}")
_START_RE = _END_RE = _MIDDLE_RE = _ALONE_RE = None
2025-12-07 17:15:09 +02:00
# ────────────────────────────────────────────────────────────────────
2025-12-07 17:15:09 +02:00
async def is_miku_addressed(message) -> bool:
"""Return True only when the message is directed *at* Miku,
not merely mentioning her.
Always responds to: DMs, @mentions, replies to Miku's messages.
For normal messages checks whether Miku's name (in any supported
script / honorific combination) appears in an "addressing" position:
Start "Miku, how are you?"
End "how are you, Miku?"
Middle "On the contrary, Miku, I think…"
Alone "Miku!" / "ミクちゃん"
Does NOT trigger on mere mentions:
"I like Miku" / "Miku is cool" / "told miku about it"
"""
# DMs always respond
2025-12-07 17:15:09 +02:00
if message.guild is None:
return True
2025-12-07 17:15:09 +02:00
if not message.guild or not message.guild.me:
logger.warning(f"Invalid guild/guild.me for message from {message.author}")
2025-12-07 17:15:09 +02:00
return False
# @mention
2025-12-07 17:15:09 +02:00
if message.guild.me in message.mentions:
return True
# Reply to Miku
2025-12-07 17:15:09 +02:00
if message.reference:
try:
ref = await message.channel.fetch_message(message.reference.message_id)
if ref.author == message.guild.me:
2025-12-07 17:15:09 +02:00
return True
except Exception as e:
logger.warning(f"Could not fetch referenced message: {e}")
2025-12-07 17:15:09 +02:00
# Regex addressing (4 pre-compiled patterns)
if _START_RE is None:
logger.error("Addressing patterns not compiled skipping pattern check")
return False
text = message.content.strip()
return bool(
_START_RE.search(text)
or _END_RE.search(text)
or _MIDDLE_RE.search(text)
or _ALONE_RE.search(text)
)