- Pre-compile 393 name variants into 4 regex patterns at module load (was 7,300+ raw re.search() calls per message) - Strict addressing detection using punctuation context: START: name at beginning + punctuation (Miku, ... / みく!...) END: comma + name at end (..., Miku / ...、ミク) MIDDLE: commas on both sides - vocative (..., Miku, ...) ALONE: name is the entire message (Miku! / ミクちゃん) - Rejects mere mentions: 'I like Miku' / 'Miku is cool' no longer trigger - Script-family-aware pattern generation (Latin, Cyrillic, Japanese) eliminates nonsensical cross-script combos (e.g. o-みく) - Word boundary enforcement prevents substring matches (mikumiku) - Fixes regex 'unbalanced parenthesis' errors from old implementation - Add comprehensive test suite (94 cases, all passing)
166 lines
6.4 KiB
Python
166 lines
6.4 KiB
Python
# utils/core.py
|
||
#
|
||
# Detects whether a Discord message is **addressed to** Miku
|
||
# (as opposed to merely mentioning her).
|
||
|
||
import re
|
||
from utils.logger import get_logger
|
||
|
||
logger = get_logger('core')
|
||
|
||
|
||
# ────────────────────────────────────────────────────────────────────
|
||
# Pre-compiled Miku addressing patterns
|
||
# Built once at module load; is_miku_addressed() runs only 4 .search()
|
||
# ────────────────────────────────────────────────────────────────────
|
||
|
||
def _build_name_variants(bases, honorifics, prefixes, connector, prefix_connector):
|
||
"""Return regex fragments for every name+honorific+prefix combo
|
||
within a single script family."""
|
||
variants = []
|
||
for base in bases:
|
||
be = re.escape(base)
|
||
variants.append(be)
|
||
for h in honorifics:
|
||
he = re.escape(h)
|
||
variants.append(be + connector + he)
|
||
for p in prefixes:
|
||
pe = re.escape(p)
|
||
variants.append(pe + prefix_connector + be)
|
||
for h in honorifics:
|
||
he = re.escape(h)
|
||
variants.append(pe + prefix_connector + be + connector + he)
|
||
return variants
|
||
|
||
|
||
def _compile_addressing_patterns():
|
||
"""Compile the four addressing regexes.
|
||
|
||
START – name at the beginning, followed by punctuation
|
||
"Miku, how are you?" "みく!聞いて"
|
||
END – comma then name at the end
|
||
"how are you, Miku?" "教えて、ミク"
|
||
MIDDLE – name surrounded by commas (vocative)
|
||
"On the contrary, Miku, I think…"
|
||
ALONE – name is the entire message
|
||
"Miku" "みく!" "ミクちゃん"
|
||
"""
|
||
latin = _build_name_variants(
|
||
bases=['miku'],
|
||
honorifics=[
|
||
'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
|
||
'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei',
|
||
'senpai', 'jou',
|
||
],
|
||
prefixes=['o-'],
|
||
connector=r'[\-\s]?',
|
||
prefix_connector=r'\s?',
|
||
)
|
||
|
||
cyrillic = _build_name_variants(
|
||
bases=['мику'],
|
||
honorifics=[
|
||
'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин',
|
||
'хейка', 'хеика', 'денка', 'какка', 'си', 'чама', 'кюн',
|
||
'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо',
|
||
],
|
||
prefixes=['о-'],
|
||
connector=r'[\-\s]?',
|
||
prefix_connector=r'\s?',
|
||
)
|
||
|
||
japanese = _build_name_variants(
|
||
bases=['みく', 'ミク', '未来'],
|
||
honorifics=[
|
||
# Hiragana
|
||
'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん',
|
||
'へいか', 'でんか', 'かっか', 'し', 'ちゃま', 'きゅん', 'どの',
|
||
'せんせい', 'せんぱい', 'じょう',
|
||
# Katakana
|
||
'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン',
|
||
'ヘイカ', 'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ',
|
||
'センセイ', 'センパイ', 'ジョウ',
|
||
],
|
||
prefixes=['お', 'オ'],
|
||
connector=r'[-]?',
|
||
prefix_connector=r'',
|
||
)
|
||
|
||
# Longest-first so the regex engine prefers the most specific match
|
||
all_v = sorted(latin + cyrillic + japanese, key=len, reverse=True)
|
||
alts = '|'.join(all_v)
|
||
|
||
NAME = rf'\b(?:{alts})\b'
|
||
PUNCT = r'[,,、::!!??.。]' # addressing punctuation after name
|
||
COMMA = r'[,,、]' # comma variants (before name / vocative)
|
||
ETRAIL = r'[!!??.。~~]*' # optional trailing at end
|
||
ATRAIL = r'[!!??.。~~♪♡❤]*' # optional trailing for name-only messages
|
||
|
||
start_re = re.compile(rf'^\s*{NAME}\s*{PUNCT}', re.IGNORECASE)
|
||
end_re = re.compile(rf'{COMMA}\s*{NAME}\s*{ETRAIL}\s*$', re.IGNORECASE)
|
||
middle_re = re.compile(rf'{COMMA}\s*{NAME}\s*{COMMA}', re.IGNORECASE)
|
||
alone_re = re.compile(rf'^\s*{NAME}\s*{ATRAIL}\s*$', re.IGNORECASE)
|
||
|
||
logger.info(f"Miku addressing: {len(all_v)} name variants compiled into 4 patterns")
|
||
return start_re, end_re, middle_re, alone_re
|
||
|
||
|
||
try:
|
||
_START_RE, _END_RE, _MIDDLE_RE, _ALONE_RE = _compile_addressing_patterns()
|
||
except Exception as e:
|
||
logger.error(f"Failed to compile addressing patterns: {e}")
|
||
_START_RE = _END_RE = _MIDDLE_RE = _ALONE_RE = None
|
||
|
||
|
||
# ────────────────────────────────────────────────────────────────────
|
||
|
||
async def is_miku_addressed(message) -> bool:
|
||
"""Return True only when the message is directed *at* Miku,
|
||
not merely mentioning her.
|
||
|
||
Always responds to: DMs, @mentions, replies to Miku's messages.
|
||
|
||
For normal messages checks whether Miku's name (in any supported
|
||
script / honorific combination) appears in an "addressing" position:
|
||
• Start – "Miku, how are you?"
|
||
• End – "how are you, Miku?"
|
||
• Middle – "On the contrary, Miku, I think…"
|
||
• Alone – "Miku!" / "ミクちゃん"
|
||
|
||
Does NOT trigger on mere mentions:
|
||
• "I like Miku" / "Miku is cool" / "told miku about it"
|
||
"""
|
||
# DMs – always respond
|
||
if message.guild is None:
|
||
return True
|
||
|
||
if not message.guild or not message.guild.me:
|
||
logger.warning(f"Invalid guild/guild.me for message from {message.author}")
|
||
return False
|
||
|
||
# @mention
|
||
if message.guild.me in message.mentions:
|
||
return True
|
||
|
||
# Reply to Miku
|
||
if message.reference:
|
||
try:
|
||
ref = await message.channel.fetch_message(message.reference.message_id)
|
||
if ref.author == message.guild.me:
|
||
return True
|
||
except Exception as e:
|
||
logger.warning(f"Could not fetch referenced message: {e}")
|
||
|
||
# Regex addressing (4 pre-compiled patterns)
|
||
if _START_RE is None:
|
||
logger.error("Addressing patterns not compiled – skipping pattern check")
|
||
return False
|
||
|
||
text = message.content.strip()
|
||
return bool(
|
||
_START_RE.search(text)
|
||
or _END_RE.search(text)
|
||
or _MIDDLE_RE.search(text)
|
||
or _ALONE_RE.search(text)
|
||
)
|