add: cheshire-cat configuration, tooling, tests, and documentation
Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
This commit is contained in:
212
cheshire-cat/compare_systems.py
Executable file
212
cheshire-cat/compare_systems.py
Executable file
@@ -0,0 +1,212 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Comparison Benchmark: Current System vs Cheshire Cat
|
||||
Measures the difference in performance between the two approaches
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
import requests
|
||||
import time
|
||||
import statistics
|
||||
from typing import List, Dict
|
||||
import asyncio
|
||||
|
||||
CAT_URL = "http://localhost:1865"
|
||||
|
||||
# Import your current LLM function
|
||||
try:
|
||||
from bot.utils import llm
|
||||
from bot import globals as bot_globals
|
||||
HAS_BOT_CODE = True
|
||||
except ImportError:
|
||||
print("⚠️ Could not import bot code - will skip direct comparison")
|
||||
HAS_BOT_CODE = False
|
||||
|
||||
TEST_QUERIES = [
|
||||
"What is your favorite food?",
|
||||
"Tell me about your friends",
|
||||
"What's the song World is Mine about?",
|
||||
"Hello Miku!",
|
||||
"Do you like to sing?",
|
||||
"Who created you?",
|
||||
"What color is your hair?",
|
||||
"Tell me about green onions",
|
||||
"What do you do for fun?",
|
||||
"Are you a Vocaloid?"
|
||||
]
|
||||
|
||||
def test_cat_query(query: str, timeout: int = 60) -> Dict:
|
||||
"""Test query using Cheshire Cat"""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{CAT_URL}/message",
|
||||
json={"text": query},
|
||||
headers={"Content-Type": "application/json"},
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
latency_ms = (time.time() - start_time) * 1000
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
content = data.get("content", "")
|
||||
|
||||
# Filter out tool calls
|
||||
if content and not (content.startswith('{"name":') or content.startswith('{')):
|
||||
return {
|
||||
"success": True,
|
||||
"latency_ms": latency_ms,
|
||||
"response": content,
|
||||
"method": "cheshire_cat"
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"latency_ms": latency_ms,
|
||||
"error": "Got tool call instead of text",
|
||||
"method": "cheshire_cat"
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"latency_ms": latency_ms,
|
||||
"error": f"HTTP {response.status_code}",
|
||||
"method": "cheshire_cat"
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"latency_ms": (time.time() - start_time) * 1000,
|
||||
"error": str(e),
|
||||
"method": "cheshire_cat"
|
||||
}
|
||||
|
||||
async def test_current_query(query: str) -> Dict:
|
||||
"""Test query using current Miku bot system"""
|
||||
if not HAS_BOT_CODE:
|
||||
return {"success": False, "error": "Bot code not available", "method": "current"}
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Use your existing query_llama function
|
||||
response = await llm.query_llama(
|
||||
user_prompt=query,
|
||||
user_id="benchmark_test",
|
||||
guild_id=None,
|
||||
response_type="dm_response"
|
||||
)
|
||||
|
||||
latency_ms = (time.time() - start_time) * 1000
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"latency_ms": latency_ms,
|
||||
"response": response,
|
||||
"method": "current"
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"latency_ms": (time.time() - start_time) * 1000,
|
||||
"error": str(e),
|
||||
"method": "current"
|
||||
}
|
||||
|
||||
async def run_comparison():
|
||||
"""Run comparison between both systems"""
|
||||
print("=" * 70)
|
||||
print("⚖️ COMPARISON: Current System vs Cheshire Cat")
|
||||
print("=" * 70)
|
||||
|
||||
cat_times: List[float] = []
|
||||
current_times: List[float] = []
|
||||
|
||||
for i, query in enumerate(TEST_QUERIES):
|
||||
print(f"\n[{i+1}/{len(TEST_QUERIES)}] Query: '{query}'")
|
||||
print("-" * 70)
|
||||
|
||||
# Test Cheshire Cat
|
||||
cat_result = test_cat_query(query)
|
||||
if cat_result["success"]:
|
||||
cat_times.append(cat_result["latency_ms"])
|
||||
print(f" 🐱 Cheshire Cat: {cat_result['latency_ms']:.0f}ms")
|
||||
print(f" Response: {cat_result['response'][:80]}...")
|
||||
else:
|
||||
print(f" 🐱 Cheshire Cat: ❌ {cat_result.get('error', 'Failed')}")
|
||||
|
||||
# Small delay between tests
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Test current system
|
||||
if HAS_BOT_CODE:
|
||||
current_result = await test_current_query(query)
|
||||
if current_result["success"]:
|
||||
current_times.append(current_result["latency_ms"])
|
||||
print(f" 📦 Current System: {current_result['latency_ms']:.0f}ms")
|
||||
print(f" Response: {current_result['response'][:80]}...")
|
||||
else:
|
||||
print(f" 📦 Current System: ❌ {current_result.get('error', 'Failed')}")
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Print comparison statistics
|
||||
print("\n" + "=" * 70)
|
||||
print("📊 COMPARISON RESULTS")
|
||||
print("=" * 70)
|
||||
|
||||
if cat_times:
|
||||
print(f"\n🐱 Cheshire Cat:")
|
||||
print(f" Mean latency: {statistics.mean(cat_times):.0f} ms")
|
||||
print(f" Median latency: {statistics.median(cat_times):.0f} ms")
|
||||
print(f" Min latency: {min(cat_times):.0f} ms")
|
||||
print(f" Max latency: {max(cat_times):.0f} ms")
|
||||
print(f" Success rate: {len(cat_times)}/{len(TEST_QUERIES)} ({len(cat_times)/len(TEST_QUERIES)*100:.0f}%)")
|
||||
|
||||
if current_times:
|
||||
print(f"\n📦 Current System:")
|
||||
print(f" Mean latency: {statistics.mean(current_times):.0f} ms")
|
||||
print(f" Median latency: {statistics.median(current_times):.0f} ms")
|
||||
print(f" Min latency: {min(current_times):.0f} ms")
|
||||
print(f" Max latency: {max(current_times):.0f} ms")
|
||||
print(f" Success rate: {len(current_times)}/{len(TEST_QUERIES)} ({len(current_times)/len(TEST_QUERIES)*100:.0f}%)")
|
||||
|
||||
if cat_times and current_times:
|
||||
print(f"\n⚖️ Comparison:")
|
||||
cat_mean = statistics.mean(cat_times)
|
||||
current_mean = statistics.mean(current_times)
|
||||
diff = cat_mean - current_mean
|
||||
diff_pct = (diff / current_mean) * 100
|
||||
|
||||
if diff > 0:
|
||||
print(f" Cheshire Cat is {diff:.0f}ms SLOWER ({diff_pct:+.1f}%)")
|
||||
else:
|
||||
print(f" Cheshire Cat is {abs(diff):.0f}ms FASTER ({diff_pct:+.1f}%)")
|
||||
|
||||
# Voice chat assessment
|
||||
print(f"\n🎤 Voice Chat Viability:")
|
||||
if cat_mean < 1500:
|
||||
print(f" ✅ Both systems suitable for voice chat")
|
||||
elif cat_mean < 2000 and current_mean < 1500:
|
||||
print(f" ⚠️ Cheshire Cat slower but still usable")
|
||||
else:
|
||||
print(f" ❌ Cheshire Cat may be too slow for real-time voice")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
|
||||
def main():
|
||||
if not HAS_BOT_CODE:
|
||||
print("\n⚠️ Running in Cat-only mode (bot code not available)")
|
||||
print(" To run full comparison:")
|
||||
print(" 1. Make sure you're running this from the cheshire-cat directory")
|
||||
print(" 2. Ensure the parent 'bot' directory is accessible\n")
|
||||
|
||||
asyncio.run(run_comparison())
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user