miku-discord/bot/utils/core.py

# utils/core.py
#
# Detects whether a Discord message is **addressed to** Miku
# (as opposed to merely mentioning her).

import re
from utils.logger import get_logger

logger = get_logger('core')


# ────────────────────────────────────────────────────────────────────
# Pre-compiled Miku addressing patterns
# Built once at module load; is_miku_addressed() runs only 4 .search()
# ────────────────────────────────────────────────────────────────────

def _build_name_variants(bases, honorifics, prefixes, connector, prefix_connector):
    """Return regex fragments for every name+honorific+prefix combo
    within a single script family."""
    variants = []
    for base in bases:
        be = re.escape(base)
        variants.append(be)
        for h in honorifics:
            he = re.escape(h)
            variants.append(be + connector + he)
        for p in prefixes:
            pe = re.escape(p)
            variants.append(pe + prefix_connector + be)
            for h in honorifics:
                he = re.escape(h)
                variants.append(pe + prefix_connector + be + connector + he)
    return variants


def _compile_addressing_patterns():
    """Compile the four addressing regexes.

    START  – name at the beginning, followed by punctuation
             "Miku, how are you?"  "みく！聞いて"
    END    – comma then name at the end
             "how are you, Miku?"  "教えて、ミク"
    MIDDLE – name surrounded by commas (vocative)
             "On the contrary, Miku, I think…"
    ALONE  – name is the entire message
             "Miku"  "みく！"  "ミクちゃん"
    """
    latin = _build_name_variants(
        bases=['miku'],
        honorifics=[
            'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
            'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei',
            'senpai', 'jou',
        ],
        prefixes=['o-'],
        connector=r'[\-\s]?',
        prefix_connector=r'\s?',
    )

    cyrillic = _build_name_variants(
        bases=['мику'],
        honorifics=[
            'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин',
            'хейка', 'хеика', 'денка', 'какка', 'си', 'чама', 'кюн',
            'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо',
        ],
        prefixes=['о-'],
        connector=r'[\-\s]?',
        prefix_connector=r'\s?',
    )

    japanese = _build_name_variants(
        bases=['みく', 'ミク', '未来'],
        honorifics=[
            # Hiragana
            'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん',
            'へいか', 'でんか', 'かっか', 'し', 'ちゃま', 'きゅん', 'どの',
            'せんせい', 'せんぱい', 'じょう',
            # Katakana
            'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン',
            'ヘイカ', 'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ',
            'センセイ', 'センパイ', 'ジョウ',
        ],
        prefixes=['お', 'オ'],
        connector=r'[-]?',
        prefix_connector=r'',
    )

    # Longest-first so the regex engine prefers the most specific match
    all_v = sorted(latin + cyrillic + japanese, key=len, reverse=True)
    alts = '|'.join(all_v)

    NAME   = rf'\b(?:{alts})\b'
    PUNCT  = r'[,，、:：!！?？.。]'        # addressing punctuation after name
    COMMA  = r'[,，、]'                      # comma variants (before name / vocative)
    ETRAIL = r'[!！?？.。~～]*'             # optional trailing at end
    ATRAIL = r'[!！?？.。~～♪♡❤]*'         # optional trailing for name-only messages

    start_re  = re.compile(rf'^\s*{NAME}\s*{PUNCT}',            re.IGNORECASE)
    end_re    = re.compile(rf'{COMMA}\s*{NAME}\s*{ETRAIL}\s*$', re.IGNORECASE)
    middle_re = re.compile(rf'{COMMA}\s*{NAME}\s*{COMMA}',      re.IGNORECASE)
    alone_re  = re.compile(rf'^\s*{NAME}\s*{ATRAIL}\s*$',       re.IGNORECASE)

    logger.info(f"Miku addressing: {len(all_v)} name variants compiled into 4 patterns")
    return start_re, end_re, middle_re, alone_re


try:
    _START_RE, _END_RE, _MIDDLE_RE, _ALONE_RE = _compile_addressing_patterns()
except Exception as e:
    logger.error(f"Failed to compile addressing patterns: {e}")
    _START_RE = _END_RE = _MIDDLE_RE = _ALONE_RE = None


# ────────────────────────────────────────────────────────────────────

async def is_miku_addressed(message) -> bool:
    """Return True only when the message is directed *at* Miku,
    not merely mentioning her.

    Always responds to:  DMs, @mentions, replies to Miku's messages.

    For normal messages checks whether Miku's name (in any supported
    script / honorific combination) appears in an "addressing" position:
      • Start  – "Miku, how are you?"
      • End    – "how are you, Miku?"
      • Middle – "On the contrary, Miku, I think…"
      • Alone  – "Miku!" / "ミクちゃん"

    Does NOT trigger on mere mentions:
      • "I like Miku" / "Miku is cool" / "told miku about it"
    """
    # DMs – always respond
    if message.guild is None:
        return True

    if not message.guild or not message.guild.me:
        logger.warning(f"Invalid guild/guild.me for message from {message.author}")
        return False

    # @mention
    if message.guild.me in message.mentions:
        return True

    # Reply to Miku
    if message.reference:
        try:
            ref = await message.channel.fetch_message(message.reference.message_id)
            if ref.author == message.guild.me:
                return True
        except Exception as e:
            logger.warning(f"Could not fetch referenced message: {e}")

    # Regex addressing (4 pre-compiled patterns)
    if _START_RE is None:
        logger.error("Addressing patterns not compiled – skipping pattern check")
        return False

    text = message.content.strip()
    return bool(
        _START_RE.search(text)
        or _END_RE.search(text)
        or _MIDDLE_RE.search(text)
        or _ALONE_RE.search(text)
    )