Rewrite is_miku_addressed() to only trigger when addressed, not mentioned

- Pre-compile 393 name variants into 4 regex patterns at module load (was 7,300+ raw re.search() calls per message) - Strict addressing detection using punctuation context: START: name at beginning + punctuation (Miku, ... / みく！...) END: comma + name at end (..., Miku / ...、ミク) MIDDLE: commas on both sides - vocative (..., Miku, ...) ALONE: name is the entire message (Miku! / ミクちゃん) - Rejects mere mentions: 'I like Miku' / 'Miku is cool' no longer trigger - Script-family-aware pattern generation (Latin, Cyrillic, Japanese) eliminates nonsensical cross-script combos (e.g. o-みく) - Word boundary enforcement prevents substring matches (mikumiku) - Fixes regex 'unbalanced parenthesis' errors from old implementation - Add comprehensive test suite (94 cases, all passing)
2026-03-03 12:42:33 +02:00
parent 892edf5564
commit a226bc41df
2 changed files with 389 additions and 142 deletions
--- a/bot/utils/core.py
+++ b/bot/utils/core.py
@@ -1,167 +1,165 @@
 # utils/core.py
+#
+# Detects whether a Discord message is **addressed to** Miku
+# (as opposed to merely mentioning her).

-import asyncio
-import aiohttp
 import re
-
-import globals
-# Langchain imports below are only used in commented-out code
-# from langchain_community.vectorstores import FAISS
-# from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
-# from langchain_core.documents import Document
 from utils.logger import get_logger

 logger = get_logger('core')


-# switch_model() removed - llama-swap handles model switching automatically
+# ────────────────────────────────────────────────────────────────────
+# Pre-compiled Miku addressing patterns
+# Built once at module load; is_miku_addressed() runs only 4 .search()
+# ────────────────────────────────────────────────────────────────────

+def _build_name_variants(bases, honorifics, prefixes, connector, prefix_connector):
+    """Return regex fragments for every name+honorific+prefix combo
+    within a single script family."""
+    variants = []
+    for base in bases:
+        be = re.escape(base)
+        variants.append(be)
+        for h in honorifics:
+            he = re.escape(h)
+            variants.append(be + connector + he)
+        for p in prefixes:
+            pe = re.escape(p)
+            variants.append(pe + prefix_connector + be)
+            for h in honorifics:
+                he = re.escape(h)
+                variants.append(pe + prefix_connector + be + connector + he)
+    return variants
+
+
+def _compile_addressing_patterns():
+    """Compile the four addressing regexes.
+
+    START  – name at the beginning, followed by punctuation
+             "Miku, how are you?"  "みく！聞いて"
+    END    – comma then name at the end
+             "how are you, Miku?"  "教えて、ミク"
+    MIDDLE – name surrounded by commas (vocative)
+             "On the contrary, Miku, I think…"
+    ALONE  – name is the entire message
+             "Miku"  "みく！"  "ミクちゃん"
+    """
+    latin = _build_name_variants(
+        bases=['miku'],
+        honorifics=[
+            'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
+            'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei',
+            'senpai', 'jou',
+        ],
+        prefixes=['o-'],
+        connector=r'[\-\s]?',
+        prefix_connector=r'\s?',
+    )
+
+    cyrillic = _build_name_variants(
+        bases=['мику'],
+        honorifics=[
+            'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин',
+            'хейка', 'хеика', 'денка', 'какка', 'си', 'чама', 'кюн',
+            'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо',
+        ],
+        prefixes=['о-'],
+        connector=r'[\-\s]?',
+        prefix_connector=r'\s?',
+    )
+
+    japanese = _build_name_variants(
+        bases=['みく', 'ミク', '未来'],
+        honorifics=[
+            # Hiragana
+            'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん',
+            'へいか', 'でんか', 'かっか', 'し', 'ちゃま', 'きゅん', 'どの',
+            'せんせい', 'せんぱい', 'じょう',
+            # Katakana
+            'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン',
+            'ヘイカ', 'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ',
+            'センセイ', 'センパイ', 'ジョウ',
+        ],
+        prefixes=['お', 'オ'],
+        connector=r'[-]?',
+        prefix_connector=r'',
+    )
+
+    # Longest-first so the regex engine prefers the most specific match
+    all_v = sorted(latin + cyrillic + japanese, key=len, reverse=True)
+    alts = '|'.join(all_v)
+
+    NAME   = rf'\b(?:{alts})\b'
+    PUNCT  = r'[,，、:：!！?？.。]'        # addressing punctuation after name
+    COMMA  = r'[,，、]'                      # comma variants (before name / vocative)
+    ETRAIL = r'[!！?？.。~～]*'             # optional trailing at end
+    ATRAIL = r'[!！?？.。~～♪♡❤]*'         # optional trailing for name-only messages
+
+    start_re  = re.compile(rf'^\s*{NAME}\s*{PUNCT}',            re.IGNORECASE)
+    end_re    = re.compile(rf'{COMMA}\s*{NAME}\s*{ETRAIL}\s*$', re.IGNORECASE)
+    middle_re = re.compile(rf'{COMMA}\s*{NAME}\s*{COMMA}',      re.IGNORECASE)
+    alone_re  = re.compile(rf'^\s*{NAME}\s*{ATRAIL}\s*$',       re.IGNORECASE)
+
+    logger.info(f"Miku addressing: {len(all_v)} name variants compiled into 4 patterns")
+    return start_re, end_re, middle_re, alone_re
+
+
+try:
+    _START_RE, _END_RE, _MIDDLE_RE, _ALONE_RE = _compile_addressing_patterns()
+except Exception as e:
+    logger.error(f"Failed to compile addressing patterns: {e}")
+    _START_RE = _END_RE = _MIDDLE_RE = _ALONE_RE = None
+
+
+# ────────────────────────────────────────────────────────────────────

 async def is_miku_addressed(message) -> bool:
-    # Check if this is a DM (no guild)
+    """Return True only when the message is directed *at* Miku,
+    not merely mentioning her.
+
+    Always responds to:  DMs, @mentions, replies to Miku's messages.
+
+    For normal messages checks whether Miku's name (in any supported
+    script / honorific combination) appears in an "addressing" position:
+      • Start  – "Miku, how are you?"
+      • End    – "how are you, Miku?"
+      • Middle – "On the contrary, Miku, I think…"
+      • Alone  – "Miku!" / "ミクちゃん"
+
+    Does NOT trigger on mere mentions:
+      • "I like Miku" / "Miku is cool" / "told miku about it"
+    """
+    # DMs – always respond
    if message.guild is None:
-        # In DMs, always respond to every message
        return True

-    # Safety check: ensure guild and guild.me exist
    if not message.guild or not message.guild.me:
-        logger.warning(f"Invalid guild or guild.me in message from {message.author}")
+        logger.warning(f"Invalid guild/guild.me for message from {message.author}")
        return False

-    # If message contains a ping for Miku, return true
+    # @mention
    if message.guild.me in message.mentions:
        return True

-    # If message is a reply, check the referenced message author
+    # Reply to Miku
    if message.reference:
        try:
-            referenced_msg = await message.channel.fetch_message(message.reference.message_id)
-            if referenced_msg.author == message.guild.me:
+            ref = await message.channel.fetch_message(message.reference.message_id)
+            if ref.author == message.guild.me:
                return True
        except Exception as e:
            logger.warning(f"Could not fetch referenced message: {e}")

-    cleaned = message.content.strip()
-    cleaned_lower = cleaned.lower()
+    # Regex addressing (4 pre-compiled patterns)
+    if _START_RE is None:
+        logger.error("Addressing patterns not compiled – skipping pattern check")
+        return False

-    # Base names for Miku in different scripts
-    base_names = [
-        'miku', 'мику', 'みく', 'ミク', '未来'
-    ]
-    
-    # Japanese honorifics - all scripts combined
-    honorifics = [
-        # Latin
-        'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika', 
-        'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei', 'senpai', 'jou',
-        # Hiragana
-        'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん', 'へいか',
-        'でんか', 'かっか', 'し', 'ちゃま', 'きゅん', 'どの', 'せんせい', 'せんぱい', 'じょう',
-        # Katakana
-        'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン', 'ヘイカ',
-        'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ', 'センセイ', 'センパイ', 'ジョウ',
-        # Cyrillic
-        'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин', 'хейка', 'хеика',
-        'денка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо'
-    ]
-    
-    # o- prefix variants
-    o_prefixes = ['o-', 'о-', 'お', 'オ']
-    
-    # Build all possible name variations to check
-    name_patterns = []
-    
-    for base in base_names:
-        base_lower = base.lower()
-        base_escaped = re.escape(base_lower)
-        
-        # Base name alone
-        name_patterns.append(base_escaped)
-        
-        # With honorifics (allows optional dash/space between)
-        for honorific in honorifics:
-            honorific_lower = honorific.lower()
-            honorific_escaped = re.escape(honorific_lower)
-            # Build pattern: base + optional [dash or space] + honorific
-            name_patterns.append(base_escaped + r'[\-\s]*' + honorific_escaped)
-        
-        # With o- prefix
-        for prefix in o_prefixes:
-            prefix_lower = prefix.lower()
-            prefix_escaped = re.escape(prefix_lower)
-            # o-prefix + optional space + base
-            name_patterns.append(prefix_escaped + r'\s*' + base_escaped)
-            
-            # With o- prefix + honorific
-            for honorific in honorifics:
-                honorific_lower = honorific.lower()
-                honorific_escaped = re.escape(honorific_lower)
-                # o-prefix + space + base + dash/space + honorific
-                name_patterns.append(prefix_escaped + r'\s*' + base_escaped + r'[\-\s]*' + honorific_escaped)
-    
-    # Check all patterns - she must be "addressed" not just mentioned
-    for pattern in name_patterns:
-        try:
-            # Pattern 1: Start of message + punctuation/end
-            # "Miku, ..." or "みく！" or "ミクちゃん、..."
-            start_p = r'^' + pattern + r'(?:[,，、!！?？.。\s]+|$)'
-            if re.search(start_p, cleaned_lower, re.IGNORECASE):
-                return True
-            
-            # Pattern 2: End of message (optionally preceded by punctuation)
-            # "..., Miku" or "...みく" or "...ミクちゃん！"
-            end_p = r'(?:[,，、!！?？.。\s]+|^)' + pattern + r'[!！?？.。\s]*$'
-            if re.search(end_p, cleaned_lower, re.IGNORECASE):
-                return True
-            
-            # Pattern 3: Middle (surrounded by punctuation)
-            # "..., Miku, ..." or "...、ミク、..."
-            middle_p = r'[,，、!！?？.。\s]+' + pattern + r'[,，、!！?？.。\s]+'
-            if re.search(middle_p, cleaned_lower, re.IGNORECASE):
-                return True
-            
-            # Pattern 4: Just the name alone
-            # "Miku" or "みく！" or "ミクちゃん"
-            alone_p = r'^\s*' + pattern + r'[!！?？.。]*\s*$'
-            if re.search(alone_p, cleaned_lower, re.IGNORECASE):
-                return True
-        except re.error as e:
-            # Log the problematic pattern and skip it
-            logger.error(f"REGEX ERROR - Pattern: '{pattern}' | Start regex: '{start_p}' | Error: {e}")
-            continue
-    
-    return False
-
-# Vectorstore functionality disabled - not needed with current structured context approach
-# If you need embeddings in the future, you can use a different embedding provider
-# For now, the bot uses structured prompts from context_manager.py
-
-# def load_miku_knowledge():
-#     with open("miku_lore.txt", "r", encoding="utf-8") as f:
-#         text = f.read()
-#     
-#     from langchain_text_splitters import RecursiveCharacterTextSplitter
-#     
-#     text_splitter = RecursiveCharacterTextSplitter(
-#         chunk_size=520,
-#         chunk_overlap=50,
-#         separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
-#     )
-#     
-#     docs = [Document(page_content=chunk) for chunk in text_splitter.split_text(text)]
-#     
-#     vectorstore = FAISS.from_documents(docs, embeddings)
-#     return vectorstore
-# 
-# def load_miku_lyrics():
-#     with open("miku_lyrics.txt", "r", encoding="utf-8") as f:
-#         lyrics_text = f.read()
-#     
-#     text_splitter = CharacterTextSplitter(chunk_size=520, chunk_overlap=50)
-#     docs = [Document(page_content=chunk) for chunk in text_splitter.split_text(lyrics_text)]
-#     
-#     vectorstore = FAISS.from_documents(docs, embeddings)
-#     return vectorstore
-# 
-# miku_vectorstore = load_miku_knowledge()
-# miku_lyrics_vectorstore = load_miku_lyrics()
+    text = message.content.strip()
+    return bool(
+        _START_RE.search(text)
+        or _END_RE.search(text)
+        or _MIDDLE_RE.search(text)
+        or _ALONE_RE.search(text)
+    )
--- a/test_addressing.py
+++ b/test_addressing.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python3
+"""Comprehensive test for Miku addressing detection patterns.
+
+Tests the pre-compiled regex patterns from bot/utils/core.py to verify
+that Miku is only triggered when *addressed*, not merely *mentioned*.
+"""
+
+import re
+import sys
+
+# ── Replicate the pattern-building logic from core.py ──
+
+def _build_name_variants(bases, honorifics, prefixes, connector, prefix_connector):
+    variants = []
+    for base in bases:
+        be = re.escape(base)
+        variants.append(be)
+        for h in honorifics:
+            he = re.escape(h)
+            variants.append(be + connector + he)
+        for p in prefixes:
+            pe = re.escape(p)
+            variants.append(pe + prefix_connector + be)
+            for h in honorifics:
+                he = re.escape(h)
+                variants.append(pe + prefix_connector + be + connector + he)
+    return variants
+
+
+latin = _build_name_variants(
+    bases=['miku'],
+    honorifics=[
+        'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
+        'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei',
+        'senpai', 'jou',
+    ],
+    prefixes=['o-'],
+    connector=r'[\-\s]?',
+    prefix_connector=r'\s?',
+)
+
+cyrillic = _build_name_variants(
+    bases=['мику'],
+    honorifics=[
+        'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин',
+        'хейка', 'хеика', 'денка', 'какка', 'си', 'чама', 'кюн',
+        'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо',
+    ],
+    prefixes=['о-'],
+    connector=r'[\-\s]?',
+    prefix_connector=r'\s?',
+)
+
+japanese = _build_name_variants(
+    bases=['みく', 'ミク', '未来'],
+    honorifics=[
+        'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん',
+        'へいか', 'でんか', 'かっか', 'し', 'ちゃま', 'きゅん', 'どの',
+        'せんせい', 'せんぱい', 'じょう',
+        'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン',
+        'ヘイカ', 'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ',
+        'センセイ', 'センパイ', 'ジョウ',
+    ],
+    prefixes=['お', 'オ'],
+    connector=r'[-]?',
+    prefix_connector=r'',
+)
+
+all_v = sorted(latin + cyrillic + japanese, key=len, reverse=True)
+alts = '|'.join(all_v)
+
+NAME   = rf'\b(?:{alts})\b'
+PUNCT  = r'[,，、:：!！?？.。]'
+COMMA  = r'[,，、]'
+ETRAIL = r'[!！?？.。~～]*'
+ATRAIL = r'[!！?？.。~～♪♡❤]*'
+
+START_RE  = re.compile(rf'^\s*{NAME}\s*{PUNCT}',            re.IGNORECASE)
+END_RE    = re.compile(rf'{COMMA}\s*{NAME}\s*{ETRAIL}\s*$', re.IGNORECASE)
+MIDDLE_RE = re.compile(rf'{COMMA}\s*{NAME}\s*{COMMA}',      re.IGNORECASE)
+ALONE_RE  = re.compile(rf'^\s*{NAME}\s*{ATRAIL}\s*$',       re.IGNORECASE)
+
+
+def is_addressed(text: str) -> bool:
+    text = text.strip()
+    return bool(
+        START_RE.search(text)
+        or END_RE.search(text)
+        or MIDDLE_RE.search(text)
+        or ALONE_RE.search(text)
+    )
+
+
+def which_pattern(text: str) -> str:
+    """Return which pattern matched (for debugging)."""
+    text = text.strip()
+    matched = []
+    if START_RE.search(text):
+        matched.append("START")
+    if END_RE.search(text):
+        matched.append("END")
+    if MIDDLE_RE.search(text):
+        matched.append("MIDDLE")
+    if ALONE_RE.search(text):
+        matched.append("ALONE")
+    return ', '.join(matched) if matched else 'NONE'
+
+
+# ── Test cases ──
+# (message, expected, description)
+TESTS = [
+    # ═══ START pattern (name at beginning + punctuation) ═══
+    ("Miku, how are you?",            True,  "START: Latin + comma"),
+    ("miku, hello!",                   True,  "START: lowercase Latin"),
+    ("MIKU! listen to me",            True,  "START: uppercase + excl"),
+    ("Miku: can you help?",           True,  "START: colon"),
+    ("Miku. Please help.",            True,  "START: period"),
+    ("みく、元気？",                   True,  "START: Hiragana + JP comma"),
+    ("ミク！聞いて",                   True,  "START: Katakana + JP excl"),
+    ("未来、教えて",                   True,  "START: Kanji + JP comma"),
+    ("мику, привет!",                 True,  "START: Cyrillic + comma"),
+    ("МИКУ! слушай",                  True,  "START: Cyrillic upper + excl"),
+    ("Miku-chan, how are you?",       True,  "START: honorific-dash + comma"),
+    ("miku chan, hello!",             True,  "START: honorific-space + comma"),
+    ("mikuchan! listen!",             True,  "START: honorific-joined + excl"),
+    ("ミクちゃん、聞いて",             True,  "START: JP name+honorific + comma"),
+    ("ミクちゃん！元気？",             True,  "START: JP name+honorific + excl"),
+    ("みくさん, 教えて",               True,  "START: Hiragana + hon + comma"),
+    ("мику-сан, скажи",              True,  "START: Cyrillic + hon + comma"),
+    ("o-miku, hello",                 True,  "START: o-prefix Latin"),
+    ("おみく、ねえ",                   True,  "START: o-prefix Japanese"),
+    ("  Miku, hello  ",              True,  "START: whitespace padded"),
+
+    # ═══ END pattern (comma + name at end) ═══
+    ("how are you, Miku?",            True,  "END: comma + Latin + ?"),
+    ("how are you, Miku!",            True,  "END: comma + Latin + !"),
+    ("how are you, Miku",             True,  "END: comma + Latin no trail"),
+    ("tell me, miku.",                True,  "END: comma + lowercase + period"),
+    ("元気, ミク",                     True,  "END: comma + Katakana"),
+    ("教えて、みく！",                 True,  "END: JP comma + Hiragana + !"),
+    ("教えて、未来",                   True,  "END: JP comma + Kanji"),
+    ("скажи, мику!",                  True,  "END: Cyrillic comma + name"),
+    ("hello, Miku-chan!",             True,  "END: comma + honorific"),
+    ("hello, miku-san?",             True,  "END: comma + honorific + ?"),
+    ("元気、ミクちゃん",               True,  "END: JP comma + JP honorific"),
+    ("hello, o-miku",                 True,  "END: comma + o-prefix"),
+
+    # ═══ MIDDLE pattern (vocative — commas on both sides) ═══
+    ("On the contrary, Miku, I think you're wrong",     True, "MIDDLE: vocative Latin"),
+    ("I am very happy, Miku, you are so fun",           True, "MIDDLE: vocative Latin 2"),
+    ("well, Miku-chan, I think so",                      True, "MIDDLE: vocative + honorific"),
+    ("しかし、みく、それは違う",                          True, "MIDDLE: vocative Japanese"),
+    ("でも、ミクちゃん、聞いて",                          True, "MIDDLE: vocative JP + honorific"),
+    ("но, мику, я думаю",                               True, "MIDDLE: vocative Cyrillic"),
+    ("hey, miku, what do you think?",                    True, "MIDDLE: vocative casual"),
+    ("you know, Miku, that's not right",                 True, "MIDDLE: vocative mid-sentence"),
+
+    # ═══ ALONE pattern (name is the entire message) ═══
+    ("Miku",                          True,  "ALONE: bare Latin"),
+    ("miku",                          True,  "ALONE: lowercase"),
+    ("MIKU",                          True,  "ALONE: uppercase"),
+    ("Miku!",                         True,  "ALONE: + excl"),
+    ("Miku?",                         True,  "ALONE: + question"),
+    ("Miku!!",                        True,  "ALONE: + multi excl"),
+    ("みく",                           True,  "ALONE: Hiragana"),
+    ("ミク！",                         True,  "ALONE: Katakana + excl"),
+    ("未来",                           True,  "ALONE: Kanji"),
+    ("мику",                           True,  "ALONE: Cyrillic"),
+    ("Miku-chan",                      True,  "ALONE: Latin + honorific"),
+    ("miku chan!",                     True,  "ALONE: space honorific + excl"),
+    ("ミクちゃん",                     True,  "ALONE: JP honorific"),
+    ("ミクさん！",                     True,  "ALONE: JP honorific + excl"),
+    ("みくせんせい",                   True,  "ALONE: Hiragana + sensei"),
+    ("o-miku!",                       True,  "ALONE: o-prefix"),
+    ("おみく",                         True,  "ALONE: JP o-prefix"),
+    ("オミク",                         True,  "ALONE: Katakana o-prefix"),
+    ("  Miku  ",                      True,  "ALONE: whitespace"),
+    ("Miku~",                         True,  "ALONE: tilde"),
+    ("Miku♪",                         True,  "ALONE: music note"),
+    ("Miku❤",                         True,  "ALONE: heart"),
+    ("мику-чан",                      True,  "ALONE: Cyrillic + honorific"),
+    ("мику сан",                      True,  "ALONE: Cyrillic + space hon"),
+    ("未来さん",                       True,  "ALONE: Kanji + honorific"),
+
+    # ═══ Should NOT match (mere mentions / not addressing) ═══
+    ("I like Miku",                    False, "REJECT: object of sentence"),
+    ("Miku is cool",                   False, "REJECT: subject + is"),
+    ("Miku is my favorite vocaloid",   False, "REJECT: subject + statement"),
+    ("I saw Miku at a concert",        False, "REJECT: middle of sentence"),
+    ("told miku about it",             False, "REJECT: informal mention"),
+    ("hatsune miku concert",           False, "REJECT: event name"),
+    ("Do you know Miku?",             False, "REJECT: asking about her"),
+    ("I love Miku!",                   False, "REJECT: exclamation about her"),
+    ("I love Miku so much",           False, "REJECT: longer statement"),
+    ("ミクは元気だよ",                 False, "REJECT: Japanese 'Miku is well'"),
+    ("ミクが好き",                     False, "REJECT: Japanese 'I like Miku'"),
+    ("ミクのことが好き",               False, "REJECT: Japanese 'I like Miku (thing)'"),
+    ("мику была там",                  False, "REJECT: Cyrillic 'Miku was there'"),
+    ("мику такая красивая",            False, "REJECT: Cyrillic 'Miku is pretty'"),
+    ("the Miku concert was great",     False, "REJECT: event discussion"),
+    ("My favorite is Miku for sure",   False, "REJECT: no comma before name at end"),
+    ("yeah miku is pretty cool right", False, "REJECT: casual mention"),
+    ("have you seen miku today",       False, "REJECT: asking about her"),
+    ("miku and I went shopping",       False, "REJECT: subject of sentence"),
+    ("I met miku yesterday",           False, "REJECT: object mid-sentence"),
+    ("mikumiku fan",                   False, "REJECT: compound word (\\b boundary)"),
+    ("hatsune miku is singing",        False, "REJECT: full name as subject"),
+
+    # ═══ Edge cases ═══
+    ("",                               False, "EDGE: empty message"),
+    ("hello",                          False, "EDGE: no name at all"),
+    ("hello!",                         False, "EDGE: exclamation, no name"),
+    ("??",                             False, "EDGE: just punctuation"),
+    ("   ",                            False, "EDGE: just whitespace"),
+    ("chan",                            False, "EDGE: just an honorific"),
+    ("o-",                             False, "EDGE: just a prefix"),
+]
+
+
+def main():
+    print(f"Generated {len(all_v)} name variants")
+    print(f"Running {len(TESTS)} test cases...\n")
+
+    passed = 0
+    failed = 0
+
+    for msg, expected, desc in TESTS:
+        result = is_addressed(msg)
+        ok = result == expected
+        if ok:
+            passed += 1
+        else:
+            failed += 1
+            pattern = which_pattern(msg)
+            exp_str = "ADDR" if expected else "SKIP"
+            got_str = "ADDR" if result else "SKIP"
+            print(f"  FAIL  expected={exp_str}  got={got_str}  matched={pattern}")
+            print(f"        {desc}")
+            print(f"        message: \"{msg}\"\n")
+
+    print(f"\n{'='*50}")
+    print(f"  {passed}/{len(TESTS)} passed, {failed} failed")
+    print(f"{'='*50}")
+
+    return 0 if failed == 0 else 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())