setup-dual-gpu.sh

#!/bin/bash
# Setup script for dual GPU configuration (NVIDIA + AMD RX 6800)

# Note: Not using 'set -e' to allow arithmetic operations

echo "================================================"
echo "Miku Discord Bot - Dual GPU Setup"
echo "================================================"
echo ""

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Check if ROCm is installed on host (optional - Docker container has it)
echo -e "${YELLOW}Checking ROCm installation (optional)...${NC}"
if command -v rocm-smi &> /dev/null; then
    echo -e "${GREEN}✓ ROCm is installed on host${NC}"
    rocm-smi --version
    
    # Check AMD GPU via rocm-smi
    echo ""
    echo -e "${YELLOW}Checking AMD GPU...${NC}"
    if rocm-smi -i 0 &> /dev/null; then
        echo -e "${GREEN}✓ AMD GPU detected via rocm-smi${NC}"
        rocm-smi -i 0 --showproductname
    else
        echo -e "${YELLOW}! AMD GPU check via rocm-smi failed${NC}"
    fi
else
    echo -e "${YELLOW}! ROCm not found on host (this is OK - Docker container includes ROCm)${NC}"
    
    # Fallback: Check AMD GPU via lspci
    echo ""
    echo -e "${YELLOW}Checking AMD GPU via lspci...${NC}"
    if lspci | grep -i "VGA.*AMD\|VGA.*Radeon" &> /dev/null; then
        echo -e "${GREEN}✓ AMD GPU detected${NC}"
        lspci | grep -i "VGA.*AMD\|VGA.*Radeon"
    else
        echo -e "${YELLOW}! AMD GPU not detected via lspci${NC}"
        echo "Note: AMD RX 6800 should be visible in lspci output"
    fi
fi

# Check NVIDIA GPU
echo ""
echo -e "${YELLOW}Checking NVIDIA GPU...${NC}"
if command -v nvidia-smi &> /dev/null; then
    echo -e "${GREEN}✓ NVIDIA GPU detected${NC}"
    nvidia-smi --query-gpu=name --format=csv,noheader
else
    echo -e "${RED}✗ NVIDIA GPU not found${NC}"
    echo "Warning: Primary GPU (NVIDIA) not detected"
fi

# Check Docker
echo ""
echo -e "${YELLOW}Checking Docker...${NC}"
if command -v docker &> /dev/null; then
    echo -e "${GREEN}✓ Docker is installed${NC}"
    docker --version
else
    echo -e "${RED}✗ Docker not found${NC}"
    exit 1
fi

# Check device permissions
echo ""
echo -e "${YELLOW}Checking device permissions...${NC}"
if [ -c /dev/kfd ] && [ -r /dev/kfd ]; then
    echo -e "${GREEN}✓ /dev/kfd accessible${NC}"
else
    echo -e "${RED}✗ /dev/kfd not accessible${NC}"
    echo "You may need to add your user to the 'render' group:"
    echo "  sudo usermod -aG render \$USER"
fi

if [ -d /dev/dri ]; then
    echo -e "${GREEN}✓ /dev/dri exists${NC}"
else
    echo -e "${RED}✗ /dev/dri not found${NC}"
fi

# Check if models exist
echo ""
echo -e "${YELLOW}Checking model files...${NC}"
MODEL_DIR="./models"
if [ -d "$MODEL_DIR" ]; then
    echo -e "${GREEN}✓ Models directory exists${NC}"
    
    # Check for specific models
    models_found=0
    if [ -f "$MODEL_DIR/Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf" ]; then
        echo -e "${GREEN}  ✓ Llama 3.1 8B model found${NC}"
        ((models_found++))
    fi
    
    if [ -f "$MODEL_DIR/DarkIdol-Llama-3.1-8B-Instruct-1.3-Uncensored_Q4_K_M.gguf" ]; then
        echo -e "${GREEN}  ✓ DarkIdol model found${NC}"
        ((models_found++))
    fi
    
    if [ -f "$MODEL_DIR/moondream2-text-model-f16_ct-vicuna.gguf" ]; then
        echo -e "${GREEN}  ✓ Moondream2 model found${NC}"
        ((models_found++))
    fi
    
    if [ $models_found -eq 0 ]; then
        echo -e "${YELLOW}  ! No models found in $MODEL_DIR${NC}"
        echo "  Please download GGUF models before starting"
    fi
else
    echo -e "${YELLOW}! Models directory not found${NC}"
    echo "Creating models directory..."
    mkdir -p "$MODEL_DIR"
fi

echo ""
echo "================================================"
echo "Setup Commands"
echo "================================================"
echo ""

echo "1. Build the AMD ROCm container:"
echo -e "   ${GREEN}docker compose build llama-swap-amd${NC}"
echo ""

echo "2. Start both GPU services:"
echo -e "   ${GREEN}docker compose up -d llama-swap llama-swap-amd${NC}"
echo ""

echo "3. Check status:"
echo -e "   ${GREEN}docker compose ps${NC}"
echo -e "   ${GREEN}docker compose logs -f llama-swap-amd${NC}"
echo ""

echo "4. Test endpoints:"
echo -e "   ${GREEN}curl http://localhost:8090/health  # NVIDIA GPU${NC}"
echo -e "   ${GREEN}curl http://localhost:8091/health  # AMD GPU${NC}"
echo ""

echo "5. Monitor GPU usage:"
echo -e "   ${GREEN}watch -n 1 nvidia-smi              # NVIDIA${NC}"
echo -e "   ${GREEN}watch -n 1 rocm-smi                # AMD${NC}"
echo ""

echo "================================================"
echo "Optional: Enable AMD GPU preference"
echo "================================================"
echo ""
echo "To prefer AMD GPU for load balancing, add to docker-compose.yml:"
echo "  environment:"
echo "    - PREFER_AMD_GPU=true"
echo ""

echo -e "${GREEN}Setup check complete!${NC}"
echo "See DUAL_GPU_SETUP.md for detailed documentation"
Add dual GPU support with web UI selector Features: - Built custom ROCm container for AMD RX 6800 GPU - Added GPU selection toggle in web UI (NVIDIA/AMD) - Unified model names across both GPUs for seamless switching - Vision model always uses NVIDIA GPU (optimal performance) - Text models (llama3.1, darkidol) can use either GPU - Added /gpu-status and /gpu-select API endpoints - Implemented GPU state persistence in memory/gpu_state.json Technical details: - Multi-stage Dockerfile.llamaswap-rocm with ROCm 6.2.4 - llama.cpp compiled with GGML_HIP=ON for gfx1030 (RX 6800) - Proper GPU permissions without root (groups 187/989) - AMD container on port 8091, NVIDIA on port 8090 - Updated bot/utils/llm.py with get_current_gpu_url() and get_vision_gpu_url() - Modified bot/utils/image_handling.py to always use NVIDIA for vision - Enhanced web UI with GPU selector button (blue=NVIDIA, red=AMD) Files modified: - docker-compose.yml (added llama-swap-amd service) - bot/globals.py (added LLAMA_AMD_URL) - bot/api.py (added GPU selection endpoints and helper function) - bot/utils/llm.py (GPU routing for text models) - bot/utils/image_handling.py (GPU routing for vision models) - bot/static/index.html (GPU selector UI) - llama-swap-rocm-config.yaml (unified model names) New files: - Dockerfile.llamaswap-rocm - bot/memory/gpu_state.json - bot/utils/gpu_router.py (load balancing utility) - setup-dual-gpu.sh (setup verification script) - DUAL_GPU_*.md (documentation files) 2026-01-09 00:03:59 +02:00			`#!/bin/bash`
			`# Setup script for dual GPU configuration (NVIDIA + AMD RX 6800)`

			`# Note: Not using 'set -e' to allow arithmetic operations`

			`echo "================================================"`
			`echo "Miku Discord Bot - Dual GPU Setup"`
			`echo "================================================"`
			`echo ""`

			`# Colors for output`
			`RED='\033[0;31m'`
			`GREEN='\033[0;32m'`
			`YELLOW='\033[1;33m'`
			`NC='\033[0m' # No Color`

			`# Check if ROCm is installed on host (optional - Docker container has it)`
			`echo -e "${YELLOW}Checking ROCm installation (optional)...${NC}"`
			`if command -v rocm-smi &> /dev/null; then`
			`echo -e "${GREEN}✓ ROCm is installed on host${NC}"`
			`rocm-smi --version`

			`# Check AMD GPU via rocm-smi`
			`echo ""`
			`echo -e "${YELLOW}Checking AMD GPU...${NC}"`
			`if rocm-smi -i 0 &> /dev/null; then`
			`echo -e "${GREEN}✓ AMD GPU detected via rocm-smi${NC}"`
			`rocm-smi -i 0 --showproductname`
			`else`
			`echo -e "${YELLOW}! AMD GPU check via rocm-smi failed${NC}"`
			`fi`
			`else`
			`echo -e "${YELLOW}! ROCm not found on host (this is OK - Docker container includes ROCm)${NC}"`

			`# Fallback: Check AMD GPU via lspci`
			`echo ""`
			`echo -e "${YELLOW}Checking AMD GPU via lspci...${NC}"`
			`if lspci \| grep -i "VGA.AMD\\|VGA.Radeon" &> /dev/null; then`
			`echo -e "${GREEN}✓ AMD GPU detected${NC}"`
			`lspci \| grep -i "VGA.AMD\\|VGA.Radeon"`
			`else`
			`echo -e "${YELLOW}! AMD GPU not detected via lspci${NC}"`
			`echo "Note: AMD RX 6800 should be visible in lspci output"`
			`fi`
			`fi`

			`# Check NVIDIA GPU`
			`echo ""`
			`echo -e "${YELLOW}Checking NVIDIA GPU...${NC}"`
			`if command -v nvidia-smi &> /dev/null; then`
			`echo -e "${GREEN}✓ NVIDIA GPU detected${NC}"`
			`nvidia-smi --query-gpu=name --format=csv,noheader`
			`else`
			`echo -e "${RED}✗ NVIDIA GPU not found${NC}"`
			`echo "Warning: Primary GPU (NVIDIA) not detected"`
			`fi`

			`# Check Docker`
			`echo ""`
			`echo -e "${YELLOW}Checking Docker...${NC}"`
			`if command -v docker &> /dev/null; then`
			`echo -e "${GREEN}✓ Docker is installed${NC}"`
			`docker --version`
			`else`
			`echo -e "${RED}✗ Docker not found${NC}"`
			`exit 1`
			`fi`

			`# Check device permissions`
			`echo ""`
			`echo -e "${YELLOW}Checking device permissions...${NC}"`
			`if [ -c /dev/kfd ] && [ -r /dev/kfd ]; then`
			`echo -e "${GREEN}✓ /dev/kfd accessible${NC}"`
			`else`
			`echo -e "${RED}✗ /dev/kfd not accessible${NC}"`
			`echo "You may need to add your user to the 'render' group:"`
			`echo " sudo usermod -aG render \$USER"`
			`fi`

			`if [ -d /dev/dri ]; then`
			`echo -e "${GREEN}✓ /dev/dri exists${NC}"`
			`else`
			`echo -e "${RED}✗ /dev/dri not found${NC}"`
			`fi`

			`# Check if models exist`
			`echo ""`
			`echo -e "${YELLOW}Checking model files...${NC}"`
			`MODEL_DIR="./models"`
			`if [ -d "$MODEL_DIR" ]; then`
			`echo -e "${GREEN}✓ Models directory exists${NC}"`

			`# Check for specific models`
			`models_found=0`
			`if [ -f "$MODEL_DIR/Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf" ]; then`
			`echo -e "${GREEN} ✓ Llama 3.1 8B model found${NC}"`
			`((models_found++))`
			`fi`

			`if [ -f "$MODEL_DIR/DarkIdol-Llama-3.1-8B-Instruct-1.3-Uncensored_Q4_K_M.gguf" ]; then`
			`echo -e "${GREEN} ✓ DarkIdol model found${NC}"`
			`((models_found++))`
			`fi`

			`if [ -f "$MODEL_DIR/moondream2-text-model-f16_ct-vicuna.gguf" ]; then`
			`echo -e "${GREEN} ✓ Moondream2 model found${NC}"`
			`((models_found++))`
			`fi`

			`if [ $models_found -eq 0 ]; then`
			`echo -e "${YELLOW} ! No models found in $MODEL_DIR${NC}"`
			`echo " Please download GGUF models before starting"`
			`fi`
			`else`
			`echo -e "${YELLOW}! Models directory not found${NC}"`
			`echo "Creating models directory..."`
			`mkdir -p "$MODEL_DIR"`
			`fi`

			`echo ""`
			`echo "================================================"`
			`echo "Setup Commands"`
			`echo "================================================"`
			`echo ""`

			`echo "1. Build the AMD ROCm container:"`
			`echo -e " ${GREEN}docker compose build llama-swap-amd${NC}"`
			`echo ""`

			`echo "2. Start both GPU services:"`
			`echo -e " ${GREEN}docker compose up -d llama-swap llama-swap-amd${NC}"`
			`echo ""`

			`echo "3. Check status:"`
			`echo -e " ${GREEN}docker compose ps${NC}"`
			`echo -e " ${GREEN}docker compose logs -f llama-swap-amd${NC}"`
			`echo ""`

			`echo "4. Test endpoints:"`
			`echo -e " ${GREEN}curl http://localhost:8090/health # NVIDIA GPU${NC}"`
			`echo -e " ${GREEN}curl http://localhost:8091/health # AMD GPU${NC}"`
			`echo ""`

			`echo "5. Monitor GPU usage:"`
			`echo -e " ${GREEN}watch -n 1 nvidia-smi # NVIDIA${NC}"`
			`echo -e " ${GREEN}watch -n 1 rocm-smi # AMD${NC}"`
			`echo ""`

			`echo "================================================"`
			`echo "Optional: Enable AMD GPU preference"`
			`echo "================================================"`
			`echo ""`
			`echo "To prefer AMD GPU for load balancing, add to docker-compose.yml:"`
			`echo " environment:"`
			`echo " - PREFER_AMD_GPU=true"`
			`echo ""`

			`echo -e "${GREEN}Setup check complete!${NC}"`
			`echo "See DUAL_GPU_SETUP.md for detailed documentation"`