#!/usr/bin/env python3 """ Cheshire Cat Test Setup Script for Miku Bot Sets up Cat to use llama-swap instead of Ollama """ import requests import time import json import sys # Configuration CAT_URL = "http://localhost:1865" LLAMA_SWAP_URL = "http://llama-swap:8080/v1" # Internal Docker network # LLAMA_SWAP_URL = "http://host.docker.internal:8080/v1" # Alternative if network doesn't work TEXT_MODEL = "Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf" # Your default text model def wait_for_cat(): """Wait for Cat to be ready""" print("Waiting for Cheshire Cat to start...") max_attempts = 30 for i in range(max_attempts): try: response = requests.get(f"{CAT_URL}/", timeout=5) if response.status_code == 200: print("โœ… Cheshire Cat is ready!") return True except requests.exceptions.RequestException: pass print(f" Attempt {i+1}/{max_attempts}...") time.sleep(2) print("โŒ Cheshire Cat failed to start") return False def configure_llm(): """Configure Cat to use llama-swap instead of Ollama""" print("\n๐Ÿ”ง Configuring LLM to use llama-swap...") # Cat's settings API endpoint settings_url = f"{CAT_URL}/settings" # OpenAI-compatible configuration for llama-swap llm_config = { "name": "LLMOpenAIConfig", "value": { "openai_api_key": "dummy", # llama-swap doesn't need this "model_name": TEXT_MODEL, "openai_api_base": LLAMA_SWAP_URL } } try: # Get current settings response = requests.get(settings_url) if response.status_code == 200: print(" Current settings retrieved") # Update LLM settings response = requests.put( f"{settings_url}/llm", json=llm_config, headers={"Content-Type": "application/json"} ) if response.status_code == 200: print(f"โœ… LLM configured to use llama-swap at {LLAMA_SWAP_URL}") print(f" Model: {TEXT_MODEL}") return True else: print(f"โŒ Failed to configure LLM: {response.status_code}") print(f" Response: {response.text}") return False except Exception as e: print(f"โŒ Error configuring LLM: {e}") return False def configure_embedder(): """Configure embedder (use CPU for now, can switch to GPU later)""" print("\n๐Ÿงฎ Configuring embedder...") # Use default embedder (sentence-transformers on CPU) # We'll test this first, then potentially switch to GPU embedder_config = { "name": "EmbedderDumbConfig", # Fast, low-quality for testing "value": {} } # For production, use this instead: # embedder_config = { # "name": "EmbedderQdrantFastEmbedConfig", # "value": { # "model_name": "sentence-transformers/all-MiniLM-L6-v2" # Lightweight model # } # } try: response = requests.put( f"{CAT_URL}/settings/embedder", json=embedder_config, headers={"Content-Type": "application/json"} ) if response.status_code == 200: print("โœ… Embedder configured (CPU-based for testing)") return True else: print(f"โš ๏ธ Embedder config returned: {response.status_code}") print(f" Using default embedder") return True # Not critical except Exception as e: print(f"โš ๏ธ Error configuring embedder: {e}") print(" Will use default embedder") return True # Not critical def upload_knowledge_base(): """Upload Miku's knowledge files to Cat""" print("\n๐Ÿ“š Uploading Miku knowledge base...") files_to_upload = [ "../bot/persona/miku/miku_lore.txt", "../bot/persona/miku/miku_prompt.txt", "../bot/persona/miku/miku_lyrics.txt" ] uploaded_count = 0 for filepath in files_to_upload: try: filename = filepath.split('/')[-1] print(f" Uploading {filename}...") with open(filepath, 'rb') as f: files = {'file': (filename, f, 'text/plain')} response = requests.post( f"{CAT_URL}/rabbithole/", files=files, timeout=60 # Chunking and embedding takes time ) if response.status_code == 200: print(f" โœ… {filename} uploaded and processed") uploaded_count += 1 else: print(f" โŒ Failed to upload {filename}: {response.status_code}") print(f" {response.text[:200]}") except FileNotFoundError: print(f" โš ๏ธ File not found: {filepath}") except Exception as e: print(f" โŒ Error uploading {filename}: {e}") print(f"\n๐Ÿ“Š Uploaded {uploaded_count}/{len(files_to_upload)} files") return uploaded_count > 0 def test_query(): """Test a simple query to verify everything works""" print("\n๐Ÿงช Testing query...") test_messages = [ "What is your favorite food?", "Who are your friends?", "Tell me about the song World is Mine" ] for message in test_messages: print(f"\n Query: '{message}'") try: response = requests.post( f"{CAT_URL}/message", json={"text": message}, headers={"Content-Type": "application/json"}, timeout=30 ) if response.status_code == 200: data = response.json() reply = data.get("content", "No response") print(f" โœ… Response: {reply[:150]}...") else: print(f" โŒ Query failed: {response.status_code}") print(f" {response.text[:200]}") except Exception as e: print(f" โŒ Error: {e}") time.sleep(1) # Brief pause between queries def main(): print("=" * 60) print("๐Ÿฑ Cheshire Cat Test Setup for Miku Bot") print("=" * 60) # Step 1: Wait for Cat to start if not wait_for_cat(): print("\nโŒ Setup failed: Cat didn't start") sys.exit(1) # Step 2: Configure LLM if not configure_llm(): print("\nโš ๏ธ LLM configuration failed, but continuing...") # Step 3: Configure embedder if not configure_embedder(): print("\nโš ๏ธ Embedder configuration failed, but continuing...") # Step 4: Upload knowledge base time.sleep(2) # Give Cat a moment to apply settings if not upload_knowledge_base(): print("\nโš ๏ธ Knowledge upload failed") # Step 5: Test queries time.sleep(5) # Give Cat time to process uploaded files test_query() print("\n" + "=" * 60) print("โœ… Setup complete!") print("=" * 60) print("\nNext steps:") print(" 1. Run benchmarks: python3 benchmark_cat.py") print(" 2. Admin panel: http://localhost:1865/admin") print(" 3. API docs: http://localhost:1865/docs") if __name__ == "__main__": main()