Configuration: - .env.example, .gitignore, compose.yml (main docker compose) - docker-compose-amd.yml (ROCm), docker-compose-macos.yml - start.sh, stop.sh convenience scripts - LICENSE (Apache 2.0, from upstream Cheshire Cat) Memory management utilities: - analyze_consolidation.py, manual_consolidation.py, verify_consolidation.py - check_memories.py, extract_declarative_facts.py, store_declarative_facts.py - compare_systems.py (system comparison tool) - benchmark_cat.py, streaming_benchmark.py, streaming_benchmark_v2.py Test suite: - quick_test.py, test_setup.py, test_setup_simple.py - test_consolidation_direct.py, test_declarative_recall.py, test_recall.py - test_end_to_end.py, test_full_pipeline.py - test_phase2.py, test_phase2_comprehensive.py Documentation: - README.md, QUICK_START.txt, TEST_README.md, SETUP_COMPLETE.md - PHASE2_IMPLEMENTATION_NOTES.md, PHASE2_TEST_RESULTS.md - POST_OPTIMIZATION_ANALYSIS.md
229 lines
7.3 KiB
Python
Executable File
229 lines
7.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Cheshire Cat Test Setup Script for Miku Bot
|
|
Sets up Cat to use llama-swap instead of Ollama
|
|
"""
|
|
|
|
import requests
|
|
import time
|
|
import json
|
|
import sys
|
|
|
|
# Configuration
|
|
CAT_URL = "http://localhost:1865"
|
|
LLAMA_SWAP_URL = "http://llama-swap:8080/v1" # Internal Docker network
|
|
# LLAMA_SWAP_URL = "http://host.docker.internal:8080/v1" # Alternative if network doesn't work
|
|
TEXT_MODEL = "Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf" # Your default text model
|
|
|
|
def wait_for_cat():
|
|
"""Wait for Cat to be ready"""
|
|
print("Waiting for Cheshire Cat to start...")
|
|
max_attempts = 30
|
|
for i in range(max_attempts):
|
|
try:
|
|
response = requests.get(f"{CAT_URL}/", timeout=5)
|
|
if response.status_code == 200:
|
|
print("✅ Cheshire Cat is ready!")
|
|
return True
|
|
except requests.exceptions.RequestException:
|
|
pass
|
|
|
|
print(f" Attempt {i+1}/{max_attempts}...")
|
|
time.sleep(2)
|
|
|
|
print("❌ Cheshire Cat failed to start")
|
|
return False
|
|
|
|
def configure_llm():
|
|
"""Configure Cat to use llama-swap instead of Ollama"""
|
|
print("\n🔧 Configuring LLM to use llama-swap...")
|
|
|
|
# Cat's settings API endpoint
|
|
settings_url = f"{CAT_URL}/settings"
|
|
|
|
# OpenAI-compatible configuration for llama-swap
|
|
llm_config = {
|
|
"name": "LLMOpenAIConfig",
|
|
"value": {
|
|
"openai_api_key": "dummy", # llama-swap doesn't need this
|
|
"model_name": TEXT_MODEL,
|
|
"openai_api_base": LLAMA_SWAP_URL
|
|
}
|
|
}
|
|
|
|
try:
|
|
# Get current settings
|
|
response = requests.get(settings_url)
|
|
if response.status_code == 200:
|
|
print(" Current settings retrieved")
|
|
|
|
# Update LLM settings
|
|
response = requests.put(
|
|
f"{settings_url}/llm",
|
|
json=llm_config,
|
|
headers={"Content-Type": "application/json"}
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
print(f"✅ LLM configured to use llama-swap at {LLAMA_SWAP_URL}")
|
|
print(f" Model: {TEXT_MODEL}")
|
|
return True
|
|
else:
|
|
print(f"❌ Failed to configure LLM: {response.status_code}")
|
|
print(f" Response: {response.text}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error configuring LLM: {e}")
|
|
return False
|
|
|
|
def configure_embedder():
|
|
"""Configure embedder (use CPU for now, can switch to GPU later)"""
|
|
print("\n🧮 Configuring embedder...")
|
|
|
|
# Use default embedder (sentence-transformers on CPU)
|
|
# We'll test this first, then potentially switch to GPU
|
|
embedder_config = {
|
|
"name": "EmbedderDumbConfig", # Fast, low-quality for testing
|
|
"value": {}
|
|
}
|
|
|
|
# For production, use this instead:
|
|
# embedder_config = {
|
|
# "name": "EmbedderQdrantFastEmbedConfig",
|
|
# "value": {
|
|
# "model_name": "sentence-transformers/all-MiniLM-L6-v2" # Lightweight model
|
|
# }
|
|
# }
|
|
|
|
try:
|
|
response = requests.put(
|
|
f"{CAT_URL}/settings/embedder",
|
|
json=embedder_config,
|
|
headers={"Content-Type": "application/json"}
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
print("✅ Embedder configured (CPU-based for testing)")
|
|
return True
|
|
else:
|
|
print(f"⚠️ Embedder config returned: {response.status_code}")
|
|
print(f" Using default embedder")
|
|
return True # Not critical
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ Error configuring embedder: {e}")
|
|
print(" Will use default embedder")
|
|
return True # Not critical
|
|
|
|
def upload_knowledge_base():
|
|
"""Upload Miku's knowledge files to Cat"""
|
|
print("\n📚 Uploading Miku knowledge base...")
|
|
|
|
files_to_upload = [
|
|
"../bot/persona/miku/miku_lore.txt",
|
|
"../bot/persona/miku/miku_prompt.txt",
|
|
"../bot/persona/miku/miku_lyrics.txt"
|
|
]
|
|
|
|
uploaded_count = 0
|
|
|
|
for filepath in files_to_upload:
|
|
try:
|
|
filename = filepath.split('/')[-1]
|
|
print(f" Uploading {filename}...")
|
|
|
|
with open(filepath, 'rb') as f:
|
|
files = {'file': (filename, f, 'text/plain')}
|
|
response = requests.post(
|
|
f"{CAT_URL}/rabbithole/",
|
|
files=files,
|
|
timeout=60 # Chunking and embedding takes time
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
print(f" ✅ {filename} uploaded and processed")
|
|
uploaded_count += 1
|
|
else:
|
|
print(f" ❌ Failed to upload {filename}: {response.status_code}")
|
|
print(f" {response.text[:200]}")
|
|
|
|
except FileNotFoundError:
|
|
print(f" ⚠️ File not found: {filepath}")
|
|
except Exception as e:
|
|
print(f" ❌ Error uploading {filename}: {e}")
|
|
|
|
print(f"\n📊 Uploaded {uploaded_count}/{len(files_to_upload)} files")
|
|
return uploaded_count > 0
|
|
|
|
def test_query():
|
|
"""Test a simple query to verify everything works"""
|
|
print("\n🧪 Testing query...")
|
|
|
|
test_messages = [
|
|
"What is your favorite food?",
|
|
"Who are your friends?",
|
|
"Tell me about the song World is Mine"
|
|
]
|
|
|
|
for message in test_messages:
|
|
print(f"\n Query: '{message}'")
|
|
try:
|
|
response = requests.post(
|
|
f"{CAT_URL}/message",
|
|
json={"text": message},
|
|
headers={"Content-Type": "application/json"},
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
reply = data.get("content", "No response")
|
|
print(f" ✅ Response: {reply[:150]}...")
|
|
else:
|
|
print(f" ❌ Query failed: {response.status_code}")
|
|
print(f" {response.text[:200]}")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error: {e}")
|
|
|
|
time.sleep(1) # Brief pause between queries
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("🐱 Cheshire Cat Test Setup for Miku Bot")
|
|
print("=" * 60)
|
|
|
|
# Step 1: Wait for Cat to start
|
|
if not wait_for_cat():
|
|
print("\n❌ Setup failed: Cat didn't start")
|
|
sys.exit(1)
|
|
|
|
# Step 2: Configure LLM
|
|
if not configure_llm():
|
|
print("\n⚠️ LLM configuration failed, but continuing...")
|
|
|
|
# Step 3: Configure embedder
|
|
if not configure_embedder():
|
|
print("\n⚠️ Embedder configuration failed, but continuing...")
|
|
|
|
# Step 4: Upload knowledge base
|
|
time.sleep(2) # Give Cat a moment to apply settings
|
|
if not upload_knowledge_base():
|
|
print("\n⚠️ Knowledge upload failed")
|
|
|
|
# Step 5: Test queries
|
|
time.sleep(5) # Give Cat time to process uploaded files
|
|
test_query()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("✅ Setup complete!")
|
|
print("=" * 60)
|
|
print("\nNext steps:")
|
|
print(" 1. Run benchmarks: python3 benchmark_cat.py")
|
|
print(" 2. Admin panel: http://localhost:1865/admin")
|
|
print(" 3. API docs: http://localhost:1865/docs")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|