cheshire-cat/test_setup.py

#!/usr/bin/env python3
"""
Cheshire Cat Test Setup Script for Miku Bot
Sets up Cat to use llama-swap instead of Ollama
"""

import requests
import time
import json
import sys

# Configuration
CAT_URL = "http://localhost:1865"
LLAMA_SWAP_URL = "http://llama-swap:8080/v1"  # Internal Docker network
# LLAMA_SWAP_URL = "http://host.docker.internal:8080/v1"  # Alternative if network doesn't work
TEXT_MODEL = "Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf"  # Your default text model

def wait_for_cat():
    """Wait for Cat to be ready"""
    print("Waiting for Cheshire Cat to start...")
    max_attempts = 30
    for i in range(max_attempts):
        try:
            response = requests.get(f"{CAT_URL}/", timeout=5)
            if response.status_code == 200:
                print("✅ Cheshire Cat is ready!")
                return True
        except requests.exceptions.RequestException:
            pass
        
        print(f"  Attempt {i+1}/{max_attempts}...")
        time.sleep(2)
    
    print("❌ Cheshire Cat failed to start")
    return False

def configure_llm():
    """Configure Cat to use llama-swap instead of Ollama"""
    print("\n🔧 Configuring LLM to use llama-swap...")
    
    # Cat's settings API endpoint
    settings_url = f"{CAT_URL}/settings"
    
    # OpenAI-compatible configuration for llama-swap
    llm_config = {
        "name": "LLMOpenAIConfig",
        "value": {
            "openai_api_key": "dummy",  # llama-swap doesn't need this
            "model_name": TEXT_MODEL,
            "openai_api_base": LLAMA_SWAP_URL
        }
    }
    
    try:
        # Get current settings
        response = requests.get(settings_url)
        if response.status_code == 200:
            print("  Current settings retrieved")
        
        # Update LLM settings
        response = requests.put(
            f"{settings_url}/llm",
            json=llm_config,
            headers={"Content-Type": "application/json"}
        )
        
        if response.status_code == 200:
            print(f"✅ LLM configured to use llama-swap at {LLAMA_SWAP_URL}")
            print(f"   Model: {TEXT_MODEL}")
            return True
        else:
            print(f"❌ Failed to configure LLM: {response.status_code}")
            print(f"   Response: {response.text}")
            return False
            
    except Exception as e:
        print(f"❌ Error configuring LLM: {e}")
        return False

def configure_embedder():
    """Configure embedder (use CPU for now, can switch to GPU later)"""
    print("\n🧮 Configuring embedder...")
    
    # Use default embedder (sentence-transformers on CPU)
    # We'll test this first, then potentially switch to GPU
    embedder_config = {
        "name": "EmbedderDumbConfig",  # Fast, low-quality for testing
        "value": {}
    }
    
    # For production, use this instead:
    # embedder_config = {
    #     "name": "EmbedderQdrantFastEmbedConfig",
    #     "value": {
    #         "model_name": "sentence-transformers/all-MiniLM-L6-v2"  # Lightweight model
    #     }
    # }
    
    try:
        response = requests.put(
            f"{CAT_URL}/settings/embedder",
            json=embedder_config,
            headers={"Content-Type": "application/json"}
        )
        
        if response.status_code == 200:
            print("✅ Embedder configured (CPU-based for testing)")
            return True
        else:
            print(f"⚠️  Embedder config returned: {response.status_code}")
            print(f"   Using default embedder")
            return True  # Not critical
            
    except Exception as e:
        print(f"⚠️  Error configuring embedder: {e}")
        print("   Will use default embedder")
        return True  # Not critical

def upload_knowledge_base():
    """Upload Miku's knowledge files to Cat"""
    print("\n📚 Uploading Miku knowledge base...")
    
    files_to_upload = [
        "../bot/persona/miku/miku_lore.txt",
        "../bot/persona/miku/miku_prompt.txt",
        "../bot/persona/miku/miku_lyrics.txt"
    ]
    
    uploaded_count = 0
    
    for filepath in files_to_upload:
        try:
            filename = filepath.split('/')[-1]
            print(f"  Uploading {filename}...")
            
            with open(filepath, 'rb') as f:
                files = {'file': (filename, f, 'text/plain')}
                response = requests.post(
                    f"{CAT_URL}/rabbithole/",
                    files=files,
                    timeout=60  # Chunking and embedding takes time
                )
            
            if response.status_code == 200:
                print(f"    ✅ {filename} uploaded and processed")
                uploaded_count += 1
            else:
                print(f"    ❌ Failed to upload {filename}: {response.status_code}")
                print(f"       {response.text[:200]}")
                
        except FileNotFoundError:
            print(f"    ⚠️  File not found: {filepath}")
        except Exception as e:
            print(f"    ❌ Error uploading {filename}: {e}")
    
    print(f"\n📊 Uploaded {uploaded_count}/{len(files_to_upload)} files")
    return uploaded_count > 0

def test_query():
    """Test a simple query to verify everything works"""
    print("\n🧪 Testing query...")
    
    test_messages = [
        "What is your favorite food?",
        "Who are your friends?",
        "Tell me about the song World is Mine"
    ]
    
    for message in test_messages:
        print(f"\n  Query: '{message}'")
        try:
            response = requests.post(
                f"{CAT_URL}/message",
                json={"text": message},
                headers={"Content-Type": "application/json"},
                timeout=30
            )
            
            if response.status_code == 200:
                data = response.json()
                reply = data.get("content", "No response")
                print(f"  ✅ Response: {reply[:150]}...")
            else:
                print(f"  ❌ Query failed: {response.status_code}")
                print(f"     {response.text[:200]}")
                
        except Exception as e:
            print(f"  ❌ Error: {e}")
        
        time.sleep(1)  # Brief pause between queries

def main():
    print("=" * 60)
    print("🐱 Cheshire Cat Test Setup for Miku Bot")
    print("=" * 60)
    
    # Step 1: Wait for Cat to start
    if not wait_for_cat():
        print("\n❌ Setup failed: Cat didn't start")
        sys.exit(1)
    
    # Step 2: Configure LLM
    if not configure_llm():
        print("\n⚠️  LLM configuration failed, but continuing...")
    
    # Step 3: Configure embedder
    if not configure_embedder():
        print("\n⚠️  Embedder configuration failed, but continuing...")
    
    # Step 4: Upload knowledge base
    time.sleep(2)  # Give Cat a moment to apply settings
    if not upload_knowledge_base():
        print("\n⚠️  Knowledge upload failed")
    
    # Step 5: Test queries
    time.sleep(5)  # Give Cat time to process uploaded files
    test_query()
    
    print("\n" + "=" * 60)
    print("✅ Setup complete!")
    print("=" * 60)
    print("\nNext steps:")
    print("  1. Run benchmarks: python3 benchmark_cat.py")
    print("  2. Admin panel: http://localhost:1865/admin")
    print("  3. API docs: http://localhost:1865/docs")

if __name__ == "__main__":
    main()
add: cheshire-cat configuration, tooling, tests, and documentation Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md 2026-03-04 00:51:14 +02:00			`#!/usr/bin/env python3`
			`"""`
			`Cheshire Cat Test Setup Script for Miku Bot`
			`Sets up Cat to use llama-swap instead of Ollama`
			`"""`

			`import requests`
			`import time`
			`import json`
			`import sys`

			`# Configuration`
			`CAT_URL = "http://localhost:1865"`
			`LLAMA_SWAP_URL = "http://llama-swap:8080/v1" # Internal Docker network`
			`# LLAMA_SWAP_URL = "http://host.docker.internal:8080/v1" # Alternative if network doesn't work`
			`TEXT_MODEL = "Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf" # Your default text model`

			`def wait_for_cat():`
			`"""Wait for Cat to be ready"""`
			`print("Waiting for Cheshire Cat to start...")`
			`max_attempts = 30`
			`for i in range(max_attempts):`
			`try:`
			`response = requests.get(f"{CAT_URL}/", timeout=5)`
			`if response.status_code == 200:`
			`print("✅ Cheshire Cat is ready!")`
			`return True`
			`except requests.exceptions.RequestException:`
			`pass`

			`print(f" Attempt {i+1}/{max_attempts}...")`
			`time.sleep(2)`

			`print("❌ Cheshire Cat failed to start")`
			`return False`

			`def configure_llm():`
			`"""Configure Cat to use llama-swap instead of Ollama"""`
			`print("\n🔧 Configuring LLM to use llama-swap...")`

			`# Cat's settings API endpoint`
			`settings_url = f"{CAT_URL}/settings"`

			`# OpenAI-compatible configuration for llama-swap`
			`llm_config = {`
			`"name": "LLMOpenAIConfig",`
			`"value": {`
			`"openai_api_key": "dummy", # llama-swap doesn't need this`
			`"model_name": TEXT_MODEL,`
			`"openai_api_base": LLAMA_SWAP_URL`
			`}`
			`}`

			`try:`
			`# Get current settings`
			`response = requests.get(settings_url)`
			`if response.status_code == 200:`
			`print(" Current settings retrieved")`

			`# Update LLM settings`
			`response = requests.put(`
			`f"{settings_url}/llm",`
			`json=llm_config,`
			`headers={"Content-Type": "application/json"}`
			`)`

			`if response.status_code == 200:`
			`print(f"✅ LLM configured to use llama-swap at {LLAMA_SWAP_URL}")`
			`print(f" Model: {TEXT_MODEL}")`
			`return True`
			`else:`
			`print(f"❌ Failed to configure LLM: {response.status_code}")`
			`print(f" Response: {response.text}")`
			`return False`

			`except Exception as e:`
			`print(f"❌ Error configuring LLM: {e}")`
			`return False`

			`def configure_embedder():`
			`"""Configure embedder (use CPU for now, can switch to GPU later)"""`
			`print("\n🧮 Configuring embedder...")`

			`# Use default embedder (sentence-transformers on CPU)`
			`# We'll test this first, then potentially switch to GPU`
			`embedder_config = {`
			`"name": "EmbedderDumbConfig", # Fast, low-quality for testing`
			`"value": {}`
			`}`

			`# For production, use this instead:`
			`# embedder_config = {`
			`# "name": "EmbedderQdrantFastEmbedConfig",`
			`# "value": {`
			`# "model_name": "sentence-transformers/all-MiniLM-L6-v2" # Lightweight model`
			`# }`
			`# }`

			`try:`
			`response = requests.put(`
			`f"{CAT_URL}/settings/embedder",`
			`json=embedder_config,`
			`headers={"Content-Type": "application/json"}`
			`)`

			`if response.status_code == 200:`
			`print("✅ Embedder configured (CPU-based for testing)")`
			`return True`
			`else:`
			`print(f"⚠️ Embedder config returned: {response.status_code}")`
			`print(f" Using default embedder")`
			`return True # Not critical`

			`except Exception as e:`
			`print(f"⚠️ Error configuring embedder: {e}")`
			`print(" Will use default embedder")`
			`return True # Not critical`

			`def upload_knowledge_base():`
			`"""Upload Miku's knowledge files to Cat"""`
			`print("\n📚 Uploading Miku knowledge base...")`

			`files_to_upload = [`
			`"../bot/persona/miku/miku_lore.txt",`
			`"../bot/persona/miku/miku_prompt.txt",`
			`"../bot/persona/miku/miku_lyrics.txt"`
			`]`

			`uploaded_count = 0`

			`for filepath in files_to_upload:`
			`try:`
			`filename = filepath.split('/')[-1]`
			`print(f" Uploading {filename}...")`

			`with open(filepath, 'rb') as f:`
			`files = {'file': (filename, f, 'text/plain')}`
			`response = requests.post(`
			`f"{CAT_URL}/rabbithole/",`
			`files=files,`
			`timeout=60 # Chunking and embedding takes time`
			`)`

			`if response.status_code == 200:`
			`print(f" ✅ {filename} uploaded and processed")`
			`uploaded_count += 1`
			`else:`
			`print(f" ❌ Failed to upload {filename}: {response.status_code}")`
			`print(f" {response.text[:200]}")`

			`except FileNotFoundError:`
			`print(f" ⚠️ File not found: {filepath}")`
			`except Exception as e:`
			`print(f" ❌ Error uploading {filename}: {e}")`

			`print(f"\n📊 Uploaded {uploaded_count}/{len(files_to_upload)} files")`
			`return uploaded_count > 0`

			`def test_query():`
			`"""Test a simple query to verify everything works"""`
			`print("\n🧪 Testing query...")`

			`test_messages = [`
			`"What is your favorite food?",`
			`"Who are your friends?",`
			`"Tell me about the song World is Mine"`
			`]`

			`for message in test_messages:`
			`print(f"\n Query: '{message}'")`
			`try:`
			`response = requests.post(`
			`f"{CAT_URL}/message",`
			`json={"text": message},`
			`headers={"Content-Type": "application/json"},`
			`timeout=30`
			`)`

			`if response.status_code == 200:`
			`data = response.json()`
			`reply = data.get("content", "No response")`
			`print(f" ✅ Response: {reply[:150]}...")`
			`else:`
			`print(f" ❌ Query failed: {response.status_code}")`
			`print(f" {response.text[:200]}")`

			`except Exception as e:`
			`print(f" ❌ Error: {e}")`

			`time.sleep(1) # Brief pause between queries`

			`def main():`
			`print("=" * 60)`
			`print("🐱 Cheshire Cat Test Setup for Miku Bot")`
			`print("=" * 60)`

			`# Step 1: Wait for Cat to start`
			`if not wait_for_cat():`
			`print("\n❌ Setup failed: Cat didn't start")`
			`sys.exit(1)`

			`# Step 2: Configure LLM`
			`if not configure_llm():`
			`print("\n⚠️ LLM configuration failed, but continuing...")`

			`# Step 3: Configure embedder`
			`if not configure_embedder():`
			`print("\n⚠️ Embedder configuration failed, but continuing...")`

			`# Step 4: Upload knowledge base`
			`time.sleep(2) # Give Cat a moment to apply settings`
			`if not upload_knowledge_base():`
			`print("\n⚠️ Knowledge upload failed")`

			`# Step 5: Test queries`
			`time.sleep(5) # Give Cat time to process uploaded files`
			`test_query()`

			`print("\n" + "=" * 60)`
			`print("✅ Setup complete!")`
			`print("=" * 60)`
			`print("\nNext steps:")`
			`print(" 1. Run benchmarks: python3 benchmark_cat.py")`
			`print(" 2. Admin panel: http://localhost:1865/admin")`
			`print(" 3. API docs: http://localhost:1865/docs")`

			`if __name__ == "__main__":`
			`main()`