#!/usr/bin/env python3 """Comprehensive test for Miku addressing detection patterns. Tests the pre-compiled regex patterns from bot/utils/core.py to verify that Miku is only triggered when *addressed*, not merely *mentioned*. """ import re import sys # ── Replicate the pattern-building logic from core.py ── def _build_name_variants(bases, honorifics, prefixes, connector, prefix_connector): variants = [] for base in bases: be = re.escape(base) variants.append(be) for h in honorifics: he = re.escape(h) variants.append(be + connector + he) for p in prefixes: pe = re.escape(p) variants.append(pe + prefix_connector + be) for h in honorifics: he = re.escape(h) variants.append(pe + prefix_connector + be + connector + he) return variants latin = _build_name_variants( bases=['miku'], honorifics=[ 'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika', 'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei', 'senpai', 'jou', ], prefixes=['o-'], connector=r'[\-\s]?', prefix_connector=r'\s?', ) cyrillic = _build_name_variants( bases=['мику'], honorifics=[ 'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин', 'хейка', 'хеика', 'денка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо', ], prefixes=['о-'], connector=r'[\-\s]?', prefix_connector=r'\s?', ) japanese = _build_name_variants( bases=['みく', 'ミク', '未来'], honorifics=[ 'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん', 'へいか', 'でんか', 'かっか', 'し', 'ちゃま', 'きゅん', 'どの', 'せんせい', 'せんぱい', 'じょう', 'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン', 'ヘイカ', 'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ', 'センセイ', 'センパイ', 'ジョウ', ], prefixes=['お', 'オ'], connector=r'[-]?', prefix_connector=r'', ) all_v = sorted(latin + cyrillic + japanese, key=len, reverse=True) alts = '|'.join(all_v) NAME = rf'\b(?:{alts})\b' PUNCT = r'[,,、::!!??.。]' COMMA = r'[,,、]' ETRAIL = r'[!!??.。~~]*' ATRAIL = r'[!!??.。~~♪♡❤]*' START_RE = re.compile(rf'^\s*{NAME}\s*{PUNCT}', re.IGNORECASE) END_RE = re.compile(rf'{COMMA}\s*{NAME}\s*{ETRAIL}\s*$', re.IGNORECASE) MIDDLE_RE = re.compile(rf'{COMMA}\s*{NAME}\s*{COMMA}', re.IGNORECASE) ALONE_RE = re.compile(rf'^\s*{NAME}\s*{ATRAIL}\s*$', re.IGNORECASE) def is_addressed(text: str) -> bool: text = text.strip() return bool( START_RE.search(text) or END_RE.search(text) or MIDDLE_RE.search(text) or ALONE_RE.search(text) ) def which_pattern(text: str) -> str: """Return which pattern matched (for debugging).""" text = text.strip() matched = [] if START_RE.search(text): matched.append("START") if END_RE.search(text): matched.append("END") if MIDDLE_RE.search(text): matched.append("MIDDLE") if ALONE_RE.search(text): matched.append("ALONE") return ', '.join(matched) if matched else 'NONE' # ── Test cases ── # (message, expected, description) TESTS = [ # ═══ START pattern (name at beginning + punctuation) ═══ ("Miku, how are you?", True, "START: Latin + comma"), ("miku, hello!", True, "START: lowercase Latin"), ("MIKU! listen to me", True, "START: uppercase + excl"), ("Miku: can you help?", True, "START: colon"), ("Miku. Please help.", True, "START: period"), ("みく、元気?", True, "START: Hiragana + JP comma"), ("ミク!聞いて", True, "START: Katakana + JP excl"), ("未来、教えて", True, "START: Kanji + JP comma"), ("мику, привет!", True, "START: Cyrillic + comma"), ("МИКУ! слушай", True, "START: Cyrillic upper + excl"), ("Miku-chan, how are you?", True, "START: honorific-dash + comma"), ("miku chan, hello!", True, "START: honorific-space + comma"), ("mikuchan! listen!", True, "START: honorific-joined + excl"), ("ミクちゃん、聞いて", True, "START: JP name+honorific + comma"), ("ミクちゃん!元気?", True, "START: JP name+honorific + excl"), ("みくさん, 教えて", True, "START: Hiragana + hon + comma"), ("мику-сан, скажи", True, "START: Cyrillic + hon + comma"), ("o-miku, hello", True, "START: o-prefix Latin"), ("おみく、ねえ", True, "START: o-prefix Japanese"), (" Miku, hello ", True, "START: whitespace padded"), # ═══ END pattern (comma + name at end) ═══ ("how are you, Miku?", True, "END: comma + Latin + ?"), ("how are you, Miku!", True, "END: comma + Latin + !"), ("how are you, Miku", True, "END: comma + Latin no trail"), ("tell me, miku.", True, "END: comma + lowercase + period"), ("元気, ミク", True, "END: comma + Katakana"), ("教えて、みく!", True, "END: JP comma + Hiragana + !"), ("教えて、未来", True, "END: JP comma + Kanji"), ("скажи, мику!", True, "END: Cyrillic comma + name"), ("hello, Miku-chan!", True, "END: comma + honorific"), ("hello, miku-san?", True, "END: comma + honorific + ?"), ("元気、ミクちゃん", True, "END: JP comma + JP honorific"), ("hello, o-miku", True, "END: comma + o-prefix"), # ═══ MIDDLE pattern (vocative — commas on both sides) ═══ ("On the contrary, Miku, I think you're wrong", True, "MIDDLE: vocative Latin"), ("I am very happy, Miku, you are so fun", True, "MIDDLE: vocative Latin 2"), ("well, Miku-chan, I think so", True, "MIDDLE: vocative + honorific"), ("しかし、みく、それは違う", True, "MIDDLE: vocative Japanese"), ("でも、ミクちゃん、聞いて", True, "MIDDLE: vocative JP + honorific"), ("но, мику, я думаю", True, "MIDDLE: vocative Cyrillic"), ("hey, miku, what do you think?", True, "MIDDLE: vocative casual"), ("you know, Miku, that's not right", True, "MIDDLE: vocative mid-sentence"), # ═══ ALONE pattern (name is the entire message) ═══ ("Miku", True, "ALONE: bare Latin"), ("miku", True, "ALONE: lowercase"), ("MIKU", True, "ALONE: uppercase"), ("Miku!", True, "ALONE: + excl"), ("Miku?", True, "ALONE: + question"), ("Miku!!", True, "ALONE: + multi excl"), ("みく", True, "ALONE: Hiragana"), ("ミク!", True, "ALONE: Katakana + excl"), ("未来", True, "ALONE: Kanji"), ("мику", True, "ALONE: Cyrillic"), ("Miku-chan", True, "ALONE: Latin + honorific"), ("miku chan!", True, "ALONE: space honorific + excl"), ("ミクちゃん", True, "ALONE: JP honorific"), ("ミクさん!", True, "ALONE: JP honorific + excl"), ("みくせんせい", True, "ALONE: Hiragana + sensei"), ("o-miku!", True, "ALONE: o-prefix"), ("おみく", True, "ALONE: JP o-prefix"), ("オミク", True, "ALONE: Katakana o-prefix"), (" Miku ", True, "ALONE: whitespace"), ("Miku~", True, "ALONE: tilde"), ("Miku♪", True, "ALONE: music note"), ("Miku❤", True, "ALONE: heart"), ("мику-чан", True, "ALONE: Cyrillic + honorific"), ("мику сан", True, "ALONE: Cyrillic + space hon"), ("未来さん", True, "ALONE: Kanji + honorific"), # ═══ Should NOT match (mere mentions / not addressing) ═══ ("I like Miku", False, "REJECT: object of sentence"), ("Miku is cool", False, "REJECT: subject + is"), ("Miku is my favorite vocaloid", False, "REJECT: subject + statement"), ("I saw Miku at a concert", False, "REJECT: middle of sentence"), ("told miku about it", False, "REJECT: informal mention"), ("hatsune miku concert", False, "REJECT: event name"), ("Do you know Miku?", False, "REJECT: asking about her"), ("I love Miku!", False, "REJECT: exclamation about her"), ("I love Miku so much", False, "REJECT: longer statement"), ("ミクは元気だよ", False, "REJECT: Japanese 'Miku is well'"), ("ミクが好き", False, "REJECT: Japanese 'I like Miku'"), ("ミクのことが好き", False, "REJECT: Japanese 'I like Miku (thing)'"), ("мику была там", False, "REJECT: Cyrillic 'Miku was there'"), ("мику такая красивая", False, "REJECT: Cyrillic 'Miku is pretty'"), ("the Miku concert was great", False, "REJECT: event discussion"), ("My favorite is Miku for sure", False, "REJECT: no comma before name at end"), ("yeah miku is pretty cool right", False, "REJECT: casual mention"), ("have you seen miku today", False, "REJECT: asking about her"), ("miku and I went shopping", False, "REJECT: subject of sentence"), ("I met miku yesterday", False, "REJECT: object mid-sentence"), ("mikumiku fan", False, "REJECT: compound word (\\b boundary)"), ("hatsune miku is singing", False, "REJECT: full name as subject"), # ═══ Edge cases ═══ ("", False, "EDGE: empty message"), ("hello", False, "EDGE: no name at all"), ("hello!", False, "EDGE: exclamation, no name"), ("??", False, "EDGE: just punctuation"), (" ", False, "EDGE: just whitespace"), ("chan", False, "EDGE: just an honorific"), ("o-", False, "EDGE: just a prefix"), ] def main(): print(f"Generated {len(all_v)} name variants") print(f"Running {len(TESTS)} test cases...\n") passed = 0 failed = 0 for msg, expected, desc in TESTS: result = is_addressed(msg) ok = result == expected if ok: passed += 1 else: failed += 1 pattern = which_pattern(msg) exp_str = "ADDR" if expected else "SKIP" got_str = "ADDR" if result else "SKIP" print(f" FAIL expected={exp_str} got={got_str} matched={pattern}") print(f" {desc}") print(f" message: \"{msg}\"\n") print(f"\n{'='*50}") print(f" {passed}/{len(TESTS)} passed, {failed} failed") print(f"{'='*50}") return 0 if failed == 0 else 1 if __name__ == '__main__': sys.exit(main())