229 lines
7.3 KiB
Python
229 lines
7.3 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Cheshire Cat Test Setup Script for Miku Bot
|
||
|
|
Sets up Cat to use llama-swap instead of Ollama
|
||
|
|
"""
|
||
|
|
|
||
|
|
import requests
|
||
|
|
import time
|
||
|
|
import json
|
||
|
|
import sys
|
||
|
|
|
||
|
|
# Configuration
|
||
|
|
CAT_URL = "http://localhost:1865"
|
||
|
|
LLAMA_SWAP_URL = "http://llama-swap:8080/v1" # Internal Docker network
|
||
|
|
# LLAMA_SWAP_URL = "http://host.docker.internal:8080/v1" # Alternative if network doesn't work
|
||
|
|
TEXT_MODEL = "Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf" # Your default text model
|
||
|
|
|
||
|
|
def wait_for_cat():
|
||
|
|
"""Wait for Cat to be ready"""
|
||
|
|
print("Waiting for Cheshire Cat to start...")
|
||
|
|
max_attempts = 30
|
||
|
|
for i in range(max_attempts):
|
||
|
|
try:
|
||
|
|
response = requests.get(f"{CAT_URL}/", timeout=5)
|
||
|
|
if response.status_code == 200:
|
||
|
|
print("✅ Cheshire Cat is ready!")
|
||
|
|
return True
|
||
|
|
except requests.exceptions.RequestException:
|
||
|
|
pass
|
||
|
|
|
||
|
|
print(f" Attempt {i+1}/{max_attempts}...")
|
||
|
|
time.sleep(2)
|
||
|
|
|
||
|
|
print("❌ Cheshire Cat failed to start")
|
||
|
|
return False
|
||
|
|
|
||
|
|
def configure_llm():
|
||
|
|
"""Configure Cat to use llama-swap instead of Ollama"""
|
||
|
|
print("\n🔧 Configuring LLM to use llama-swap...")
|
||
|
|
|
||
|
|
# Cat's settings API endpoint
|
||
|
|
settings_url = f"{CAT_URL}/settings"
|
||
|
|
|
||
|
|
# OpenAI-compatible configuration for llama-swap
|
||
|
|
llm_config = {
|
||
|
|
"name": "LLMOpenAIConfig",
|
||
|
|
"value": {
|
||
|
|
"openai_api_key": "dummy", # llama-swap doesn't need this
|
||
|
|
"model_name": TEXT_MODEL,
|
||
|
|
"openai_api_base": LLAMA_SWAP_URL
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Get current settings
|
||
|
|
response = requests.get(settings_url)
|
||
|
|
if response.status_code == 200:
|
||
|
|
print(" Current settings retrieved")
|
||
|
|
|
||
|
|
# Update LLM settings
|
||
|
|
response = requests.put(
|
||
|
|
f"{settings_url}/llm",
|
||
|
|
json=llm_config,
|
||
|
|
headers={"Content-Type": "application/json"}
|
||
|
|
)
|
||
|
|
|
||
|
|
if response.status_code == 200:
|
||
|
|
print(f"✅ LLM configured to use llama-swap at {LLAMA_SWAP_URL}")
|
||
|
|
print(f" Model: {TEXT_MODEL}")
|
||
|
|
return True
|
||
|
|
else:
|
||
|
|
print(f"❌ Failed to configure LLM: {response.status_code}")
|
||
|
|
print(f" Response: {response.text}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Error configuring LLM: {e}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
def configure_embedder():
|
||
|
|
"""Configure embedder (use CPU for now, can switch to GPU later)"""
|
||
|
|
print("\n🧮 Configuring embedder...")
|
||
|
|
|
||
|
|
# Use default embedder (sentence-transformers on CPU)
|
||
|
|
# We'll test this first, then potentially switch to GPU
|
||
|
|
embedder_config = {
|
||
|
|
"name": "EmbedderDumbConfig", # Fast, low-quality for testing
|
||
|
|
"value": {}
|
||
|
|
}
|
||
|
|
|
||
|
|
# For production, use this instead:
|
||
|
|
# embedder_config = {
|
||
|
|
# "name": "EmbedderQdrantFastEmbedConfig",
|
||
|
|
# "value": {
|
||
|
|
# "model_name": "sentence-transformers/all-MiniLM-L6-v2" # Lightweight model
|
||
|
|
# }
|
||
|
|
# }
|
||
|
|
|
||
|
|
try:
|
||
|
|
response = requests.put(
|
||
|
|
f"{CAT_URL}/settings/embedder",
|
||
|
|
json=embedder_config,
|
||
|
|
headers={"Content-Type": "application/json"}
|
||
|
|
)
|
||
|
|
|
||
|
|
if response.status_code == 200:
|
||
|
|
print("✅ Embedder configured (CPU-based for testing)")
|
||
|
|
return True
|
||
|
|
else:
|
||
|
|
print(f"⚠️ Embedder config returned: {response.status_code}")
|
||
|
|
print(f" Using default embedder")
|
||
|
|
return True # Not critical
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"⚠️ Error configuring embedder: {e}")
|
||
|
|
print(" Will use default embedder")
|
||
|
|
return True # Not critical
|
||
|
|
|
||
|
|
def upload_knowledge_base():
|
||
|
|
"""Upload Miku's knowledge files to Cat"""
|
||
|
|
print("\n📚 Uploading Miku knowledge base...")
|
||
|
|
|
||
|
|
files_to_upload = [
|
||
|
|
"../bot/persona/miku/miku_lore.txt",
|
||
|
|
"../bot/persona/miku/miku_prompt.txt",
|
||
|
|
"../bot/persona/miku/miku_lyrics.txt"
|
||
|
|
]
|
||
|
|
|
||
|
|
uploaded_count = 0
|
||
|
|
|
||
|
|
for filepath in files_to_upload:
|
||
|
|
try:
|
||
|
|
filename = filepath.split('/')[-1]
|
||
|
|
print(f" Uploading {filename}...")
|
||
|
|
|
||
|
|
with open(filepath, 'rb') as f:
|
||
|
|
files = {'file': (filename, f, 'text/plain')}
|
||
|
|
response = requests.post(
|
||
|
|
f"{CAT_URL}/rabbithole/",
|
||
|
|
files=files,
|
||
|
|
timeout=60 # Chunking and embedding takes time
|
||
|
|
)
|
||
|
|
|
||
|
|
if response.status_code == 200:
|
||
|
|
print(f" ✅ {filename} uploaded and processed")
|
||
|
|
uploaded_count += 1
|
||
|
|
else:
|
||
|
|
print(f" ❌ Failed to upload {filename}: {response.status_code}")
|
||
|
|
print(f" {response.text[:200]}")
|
||
|
|
|
||
|
|
except FileNotFoundError:
|
||
|
|
print(f" ⚠️ File not found: {filepath}")
|
||
|
|
except Exception as e:
|
||
|
|
print(f" ❌ Error uploading {filename}: {e}")
|
||
|
|
|
||
|
|
print(f"\n📊 Uploaded {uploaded_count}/{len(files_to_upload)} files")
|
||
|
|
return uploaded_count > 0
|
||
|
|
|
||
|
|
def test_query():
|
||
|
|
"""Test a simple query to verify everything works"""
|
||
|
|
print("\n🧪 Testing query...")
|
||
|
|
|
||
|
|
test_messages = [
|
||
|
|
"What is your favorite food?",
|
||
|
|
"Who are your friends?",
|
||
|
|
"Tell me about the song World is Mine"
|
||
|
|
]
|
||
|
|
|
||
|
|
for message in test_messages:
|
||
|
|
print(f"\n Query: '{message}'")
|
||
|
|
try:
|
||
|
|
response = requests.post(
|
||
|
|
f"{CAT_URL}/message",
|
||
|
|
json={"text": message},
|
||
|
|
headers={"Content-Type": "application/json"},
|
||
|
|
timeout=30
|
||
|
|
)
|
||
|
|
|
||
|
|
if response.status_code == 200:
|
||
|
|
data = response.json()
|
||
|
|
reply = data.get("content", "No response")
|
||
|
|
print(f" ✅ Response: {reply[:150]}...")
|
||
|
|
else:
|
||
|
|
print(f" ❌ Query failed: {response.status_code}")
|
||
|
|
print(f" {response.text[:200]}")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f" ❌ Error: {e}")
|
||
|
|
|
||
|
|
time.sleep(1) # Brief pause between queries
|
||
|
|
|
||
|
|
def main():
|
||
|
|
print("=" * 60)
|
||
|
|
print("🐱 Cheshire Cat Test Setup for Miku Bot")
|
||
|
|
print("=" * 60)
|
||
|
|
|
||
|
|
# Step 1: Wait for Cat to start
|
||
|
|
if not wait_for_cat():
|
||
|
|
print("\n❌ Setup failed: Cat didn't start")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
# Step 2: Configure LLM
|
||
|
|
if not configure_llm():
|
||
|
|
print("\n⚠️ LLM configuration failed, but continuing...")
|
||
|
|
|
||
|
|
# Step 3: Configure embedder
|
||
|
|
if not configure_embedder():
|
||
|
|
print("\n⚠️ Embedder configuration failed, but continuing...")
|
||
|
|
|
||
|
|
# Step 4: Upload knowledge base
|
||
|
|
time.sleep(2) # Give Cat a moment to apply settings
|
||
|
|
if not upload_knowledge_base():
|
||
|
|
print("\n⚠️ Knowledge upload failed")
|
||
|
|
|
||
|
|
# Step 5: Test queries
|
||
|
|
time.sleep(5) # Give Cat time to process uploaded files
|
||
|
|
test_query()
|
||
|
|
|
||
|
|
print("\n" + "=" * 60)
|
||
|
|
print("✅ Setup complete!")
|
||
|
|
print("=" * 60)
|
||
|
|
print("\nNext steps:")
|
||
|
|
print(" 1. Run benchmarks: python3 benchmark_cat.py")
|
||
|
|
print(" 2. Admin panel: http://localhost:1865/admin")
|
||
|
|
print(" 3. API docs: http://localhost:1865/docs")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|