Files
miku-discord/tests/test_addressing.py
koko210Serve fdde12c03d reorganize: move all test scripts to tests/ directory
- Moved 8 root-level test scripts + 2 from bot/ to tests/
- Moved run_rocinante_test.sh runner script to tests/
- Added tests/README.md documenting each test's purpose, type, and requirements
- Added test_pfp_context.py and test_rocinante_comparison.py (previously untracked)
2026-03-04 00:18:21 +02:00

250 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""Comprehensive test for Miku addressing detection patterns.
Tests the pre-compiled regex patterns from bot/utils/core.py to verify
that Miku is only triggered when *addressed*, not merely *mentioned*.
"""
import re
import sys
# ── Replicate the pattern-building logic from core.py ──
def _build_name_variants(bases, honorifics, prefixes, connector, prefix_connector):
variants = []
for base in bases:
be = re.escape(base)
variants.append(be)
for h in honorifics:
he = re.escape(h)
variants.append(be + connector + he)
for p in prefixes:
pe = re.escape(p)
variants.append(pe + prefix_connector + be)
for h in honorifics:
he = re.escape(h)
variants.append(pe + prefix_connector + be + connector + he)
return variants
latin = _build_name_variants(
bases=['miku'],
honorifics=[
'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei',
'senpai', 'jou',
],
prefixes=['o-'],
connector=r'[\-\s]?',
prefix_connector=r'\s?',
)
cyrillic = _build_name_variants(
bases=['мику'],
honorifics=[
'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин',
'хейка', 'хеика', 'денка', 'какка', 'си', 'чама', 'кюн',
'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо',
],
prefixes=['о-'],
connector=r'[\-\s]?',
prefix_connector=r'\s?',
)
japanese = _build_name_variants(
bases=['みく', 'ミク', '未来'],
honorifics=[
'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん',
'へいか', 'でんか', 'かっか', '', 'ちゃま', 'きゅん', 'どの',
'せんせい', 'せんぱい', 'じょう',
'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン',
'ヘイカ', 'デンカ', 'カッカ', '', 'チャマ', 'キュン', 'ドノ',
'センセイ', 'センパイ', 'ジョウ',
],
prefixes=['', ''],
connector=r'[-]?',
prefix_connector=r'',
)
all_v = sorted(latin + cyrillic + japanese, key=len, reverse=True)
alts = '|'.join(all_v)
NAME = rf'\b(?:{alts})\b'
PUNCT = r'[,,、:!?.。]'
COMMA = r'[,,、]'
ETRAIL = r'[!?.。~]*'
ATRAIL = r'[!?.。~~♪♡❤]*'
START_RE = re.compile(rf'^\s*{NAME}\s*{PUNCT}', re.IGNORECASE)
END_RE = re.compile(rf'{COMMA}\s*{NAME}\s*{ETRAIL}\s*$', re.IGNORECASE)
MIDDLE_RE = re.compile(rf'{COMMA}\s*{NAME}\s*{COMMA}', re.IGNORECASE)
ALONE_RE = re.compile(rf'^\s*{NAME}\s*{ATRAIL}\s*$', re.IGNORECASE)
def is_addressed(text: str) -> bool:
text = text.strip()
return bool(
START_RE.search(text)
or END_RE.search(text)
or MIDDLE_RE.search(text)
or ALONE_RE.search(text)
)
def which_pattern(text: str) -> str:
"""Return which pattern matched (for debugging)."""
text = text.strip()
matched = []
if START_RE.search(text):
matched.append("START")
if END_RE.search(text):
matched.append("END")
if MIDDLE_RE.search(text):
matched.append("MIDDLE")
if ALONE_RE.search(text):
matched.append("ALONE")
return ', '.join(matched) if matched else 'NONE'
# ── Test cases ──
# (message, expected, description)
TESTS = [
# ═══ START pattern (name at beginning + punctuation) ═══
("Miku, how are you?", True, "START: Latin + comma"),
("miku, hello!", True, "START: lowercase Latin"),
("MIKU! listen to me", True, "START: uppercase + excl"),
("Miku: can you help?", True, "START: colon"),
("Miku. Please help.", True, "START: period"),
("みく、元気?", True, "START: Hiragana + JP comma"),
("ミク!聞いて", True, "START: Katakana + JP excl"),
("未来、教えて", True, "START: Kanji + JP comma"),
("мику, привет!", True, "START: Cyrillic + comma"),
("МИКУ! слушай", True, "START: Cyrillic upper + excl"),
("Miku-chan, how are you?", True, "START: honorific-dash + comma"),
("miku chan, hello!", True, "START: honorific-space + comma"),
("mikuchan! listen!", True, "START: honorific-joined + excl"),
("ミクちゃん、聞いて", True, "START: JP name+honorific + comma"),
("ミクちゃん!元気?", True, "START: JP name+honorific + excl"),
("みくさん, 教えて", True, "START: Hiragana + hon + comma"),
("мику-сан, скажи", True, "START: Cyrillic + hon + comma"),
("o-miku, hello", True, "START: o-prefix Latin"),
("おみく、ねえ", True, "START: o-prefix Japanese"),
(" Miku, hello ", True, "START: whitespace padded"),
# ═══ END pattern (comma + name at end) ═══
("how are you, Miku?", True, "END: comma + Latin + ?"),
("how are you, Miku!", True, "END: comma + Latin + !"),
("how are you, Miku", True, "END: comma + Latin no trail"),
("tell me, miku.", True, "END: comma + lowercase + period"),
("元気, ミク", True, "END: comma + Katakana"),
("教えて、みく!", True, "END: JP comma + Hiragana + !"),
("教えて、未来", True, "END: JP comma + Kanji"),
("скажи, мику!", True, "END: Cyrillic comma + name"),
("hello, Miku-chan!", True, "END: comma + honorific"),
("hello, miku-san?", True, "END: comma + honorific + ?"),
("元気、ミクちゃん", True, "END: JP comma + JP honorific"),
("hello, o-miku", True, "END: comma + o-prefix"),
# ═══ MIDDLE pattern (vocative — commas on both sides) ═══
("On the contrary, Miku, I think you're wrong", True, "MIDDLE: vocative Latin"),
("I am very happy, Miku, you are so fun", True, "MIDDLE: vocative Latin 2"),
("well, Miku-chan, I think so", True, "MIDDLE: vocative + honorific"),
("しかし、みく、それは違う", True, "MIDDLE: vocative Japanese"),
("でも、ミクちゃん、聞いて", True, "MIDDLE: vocative JP + honorific"),
("но, мику, я думаю", True, "MIDDLE: vocative Cyrillic"),
("hey, miku, what do you think?", True, "MIDDLE: vocative casual"),
("you know, Miku, that's not right", True, "MIDDLE: vocative mid-sentence"),
# ═══ ALONE pattern (name is the entire message) ═══
("Miku", True, "ALONE: bare Latin"),
("miku", True, "ALONE: lowercase"),
("MIKU", True, "ALONE: uppercase"),
("Miku!", True, "ALONE: + excl"),
("Miku?", True, "ALONE: + question"),
("Miku!!", True, "ALONE: + multi excl"),
("みく", True, "ALONE: Hiragana"),
("ミク!", True, "ALONE: Katakana + excl"),
("未来", True, "ALONE: Kanji"),
("мику", True, "ALONE: Cyrillic"),
("Miku-chan", True, "ALONE: Latin + honorific"),
("miku chan!", True, "ALONE: space honorific + excl"),
("ミクちゃん", True, "ALONE: JP honorific"),
("ミクさん!", True, "ALONE: JP honorific + excl"),
("みくせんせい", True, "ALONE: Hiragana + sensei"),
("o-miku!", True, "ALONE: o-prefix"),
("おみく", True, "ALONE: JP o-prefix"),
("オミク", True, "ALONE: Katakana o-prefix"),
(" Miku ", True, "ALONE: whitespace"),
("Miku~", True, "ALONE: tilde"),
("Miku♪", True, "ALONE: music note"),
("Miku❤", True, "ALONE: heart"),
("мику-чан", True, "ALONE: Cyrillic + honorific"),
("мику сан", True, "ALONE: Cyrillic + space hon"),
("未来さん", True, "ALONE: Kanji + honorific"),
# ═══ Should NOT match (mere mentions / not addressing) ═══
("I like Miku", False, "REJECT: object of sentence"),
("Miku is cool", False, "REJECT: subject + is"),
("Miku is my favorite vocaloid", False, "REJECT: subject + statement"),
("I saw Miku at a concert", False, "REJECT: middle of sentence"),
("told miku about it", False, "REJECT: informal mention"),
("hatsune miku concert", False, "REJECT: event name"),
("Do you know Miku?", False, "REJECT: asking about her"),
("I love Miku!", False, "REJECT: exclamation about her"),
("I love Miku so much", False, "REJECT: longer statement"),
("ミクは元気だよ", False, "REJECT: Japanese 'Miku is well'"),
("ミクが好き", False, "REJECT: Japanese 'I like Miku'"),
("ミクのことが好き", False, "REJECT: Japanese 'I like Miku (thing)'"),
("мику была там", False, "REJECT: Cyrillic 'Miku was there'"),
("мику такая красивая", False, "REJECT: Cyrillic 'Miku is pretty'"),
("the Miku concert was great", False, "REJECT: event discussion"),
("My favorite is Miku for sure", False, "REJECT: no comma before name at end"),
("yeah miku is pretty cool right", False, "REJECT: casual mention"),
("have you seen miku today", False, "REJECT: asking about her"),
("miku and I went shopping", False, "REJECT: subject of sentence"),
("I met miku yesterday", False, "REJECT: object mid-sentence"),
("mikumiku fan", False, "REJECT: compound word (\\b boundary)"),
("hatsune miku is singing", False, "REJECT: full name as subject"),
# ═══ Edge cases ═══
("", False, "EDGE: empty message"),
("hello", False, "EDGE: no name at all"),
("hello!", False, "EDGE: exclamation, no name"),
("??", False, "EDGE: just punctuation"),
(" ", False, "EDGE: just whitespace"),
("chan", False, "EDGE: just an honorific"),
("o-", False, "EDGE: just a prefix"),
]
def main():
print(f"Generated {len(all_v)} name variants")
print(f"Running {len(TESTS)} test cases...\n")
passed = 0
failed = 0
for msg, expected, desc in TESTS:
result = is_addressed(msg)
ok = result == expected
if ok:
passed += 1
else:
failed += 1
pattern = which_pattern(msg)
exp_str = "ADDR" if expected else "SKIP"
got_str = "ADDR" if result else "SKIP"
print(f" FAIL expected={exp_str} got={got_str} matched={pattern}")
print(f" {desc}")
print(f" message: \"{msg}\"\n")
print(f"\n{'='*50}")
print(f" {passed}/{len(TESTS)} passed, {failed} failed")
print(f"{'='*50}")
return 0 if failed == 0 else 1
if __name__ == '__main__':
sys.exit(main())