add: cheshire-cat configuration, tooling, tests, and documentation

Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
2026-03-04 00:51:14 +02:00
parent eafab336b4
commit ae1e0aa144
35 changed files with 6055 additions and 0 deletions
--- a/cheshire-cat/compare_systems.py
+++ b/cheshire-cat/compare_systems.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+"""
+Comparison Benchmark: Current System vs Cheshire Cat
+Measures the difference in performance between the two approaches
+"""
+
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+import requests
+import time
+import statistics
+from typing import List, Dict
+import asyncio
+
+CAT_URL = "http://localhost:1865"
+
+# Import your current LLM function
+try:
+    from bot.utils import llm
+    from bot import globals as bot_globals
+    HAS_BOT_CODE = True
+except ImportError:
+    print("⚠️  Could not import bot code - will skip direct comparison")
+    HAS_BOT_CODE = False
+
+TEST_QUERIES = [
+    "What is your favorite food?",
+    "Tell me about your friends",
+    "What's the song World is Mine about?",
+    "Hello Miku!",
+    "Do you like to sing?",
+    "Who created you?",
+    "What color is your hair?",
+    "Tell me about green onions",
+    "What do you do for fun?",
+    "Are you a Vocaloid?"
+]
+
+def test_cat_query(query: str, timeout: int = 60) -> Dict:
+    """Test query using Cheshire Cat"""
+    start_time = time.time()
+    
+    try:
+        response = requests.post(
+            f"{CAT_URL}/message",
+            json={"text": query},
+            headers={"Content-Type": "application/json"},
+            timeout=timeout
+        )
+        
+        latency_ms = (time.time() - start_time) * 1000
+        
+        if response.status_code == 200:
+            data = response.json()
+            content = data.get("content", "")
+            
+            # Filter out tool calls
+            if content and not (content.startswith('{"name":') or content.startswith('{')):
+                return {
+                    "success": True,
+                    "latency_ms": latency_ms,
+                    "response": content,
+                    "method": "cheshire_cat"
+                }
+            else:
+                return {
+                    "success": False,
+                    "latency_ms": latency_ms,
+                    "error": "Got tool call instead of text",
+                    "method": "cheshire_cat"
+                }
+        else:
+            return {
+                "success": False,
+                "latency_ms": latency_ms,
+                "error": f"HTTP {response.status_code}",
+                "method": "cheshire_cat"
+            }
+    except Exception as e:
+        return {
+            "success": False,
+            "latency_ms": (time.time() - start_time) * 1000,
+            "error": str(e),
+            "method": "cheshire_cat"
+        }
+
+async def test_current_query(query: str) -> Dict:
+    """Test query using current Miku bot system"""
+    if not HAS_BOT_CODE:
+        return {"success": False, "error": "Bot code not available", "method": "current"}
+    
+    start_time = time.time()
+    
+    try:
+        # Use your existing query_llama function
+        response = await llm.query_llama(
+            user_prompt=query,
+            user_id="benchmark_test",
+            guild_id=None,
+            response_type="dm_response"
+        )
+        
+        latency_ms = (time.time() - start_time) * 1000
+        
+        return {
+            "success": True,
+            "latency_ms": latency_ms,
+            "response": response,
+            "method": "current"
+        }
+    except Exception as e:
+        return {
+            "success": False,
+            "latency_ms": (time.time() - start_time) * 1000,
+            "error": str(e),
+            "method": "current"
+        }
+
+async def run_comparison():
+    """Run comparison between both systems"""
+    print("=" * 70)
+    print("⚖️  COMPARISON: Current System vs Cheshire Cat")
+    print("=" * 70)
+    
+    cat_times: List[float] = []
+    current_times: List[float] = []
+    
+    for i, query in enumerate(TEST_QUERIES):
+        print(f"\n[{i+1}/{len(TEST_QUERIES)}] Query: '{query}'")
+        print("-" * 70)
+        
+        # Test Cheshire Cat
+        cat_result = test_cat_query(query)
+        if cat_result["success"]:
+            cat_times.append(cat_result["latency_ms"])
+            print(f"  🐱 Cheshire Cat: {cat_result['latency_ms']:.0f}ms")
+            print(f"     Response: {cat_result['response'][:80]}...")
+        else:
+            print(f"  🐱 Cheshire Cat: ❌ {cat_result.get('error', 'Failed')}")
+        
+        # Small delay between tests
+        await asyncio.sleep(1)
+        
+        # Test current system
+        if HAS_BOT_CODE:
+            current_result = await test_current_query(query)
+            if current_result["success"]:
+                current_times.append(current_result["latency_ms"])
+                print(f"  📦 Current System: {current_result['latency_ms']:.0f}ms")
+                print(f"     Response: {current_result['response'][:80]}...")
+            else:
+                print(f"  📦 Current System: ❌ {current_result.get('error', 'Failed')}")
+        
+        await asyncio.sleep(1)
+    
+    # Print comparison statistics
+    print("\n" + "=" * 70)
+    print("📊 COMPARISON RESULTS")
+    print("=" * 70)
+    
+    if cat_times:
+        print(f"\n🐱 Cheshire Cat:")
+        print(f"   Mean latency:     {statistics.mean(cat_times):.0f} ms")
+        print(f"   Median latency:   {statistics.median(cat_times):.0f} ms")
+        print(f"   Min latency:      {min(cat_times):.0f} ms")
+        print(f"   Max latency:      {max(cat_times):.0f} ms")
+        print(f"   Success rate:     {len(cat_times)}/{len(TEST_QUERIES)} ({len(cat_times)/len(TEST_QUERIES)*100:.0f}%)")
+    
+    if current_times:
+        print(f"\n📦 Current System:")
+        print(f"   Mean latency:     {statistics.mean(current_times):.0f} ms")
+        print(f"   Median latency:   {statistics.median(current_times):.0f} ms")
+        print(f"   Min latency:      {min(current_times):.0f} ms")
+        print(f"   Max latency:      {max(current_times):.0f} ms")
+        print(f"   Success rate:     {len(current_times)}/{len(TEST_QUERIES)} ({len(current_times)/len(TEST_QUERIES)*100:.0f}%)")
+    
+    if cat_times and current_times:
+        print(f"\n⚖️  Comparison:")
+        cat_mean = statistics.mean(cat_times)
+        current_mean = statistics.mean(current_times)
+        diff = cat_mean - current_mean
+        diff_pct = (diff / current_mean) * 100
+        
+        if diff > 0:
+            print(f"   Cheshire Cat is {diff:.0f}ms SLOWER ({diff_pct:+.1f}%)")
+        else:
+            print(f"   Cheshire Cat is {abs(diff):.0f}ms FASTER ({diff_pct:+.1f}%)")
+        
+        # Voice chat assessment
+        print(f"\n🎤 Voice Chat Viability:")
+        if cat_mean < 1500:
+            print(f"   ✅ Both systems suitable for voice chat")
+        elif cat_mean < 2000 and current_mean < 1500:
+            print(f"   ⚠️  Cheshire Cat slower but still usable")
+        else:
+            print(f"   ❌ Cheshire Cat may be too slow for real-time voice")
+    
+    print("\n" + "=" * 70)
+
+def main():
+    if not HAS_BOT_CODE:
+        print("\n⚠️  Running in Cat-only mode (bot code not available)")
+        print("   To run full comparison:")
+        print("   1. Make sure you're running this from the cheshire-cat directory")
+        print("   2. Ensure the parent 'bot' directory is accessible\n")
+    
+    asyncio.run(run_comparison())
+
+if __name__ == "__main__":
+    main()