Files
miku-discord/llama-swap-config.yaml

47 lines
2.0 KiB
YAML
Raw Normal View History

2025-12-07 17:15:09 +02:00
# llama-swap configuration for Miku Discord Bot
# This manages automatic model switching and unloading
models:
# Main text generation model (Llama 3.1 8B)
llama3.1:
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf -ngl 99 -c 16384 --host 0.0.0.0 --no-warmup --flash-attn on
2025-12-07 17:15:09 +02:00
ttl: 1800 # Unload after 30 minutes of inactivity (1800 seconds)
swap: true # CRITICAL: Unload other models when loading this one
2025-12-07 17:15:09 +02:00
aliases:
- llama3.1
- text-model
# Evil/Uncensored text generation model (DarkIdol-Llama 3.1 8B)
darkidol:
cmd: /app/llama-server --port ${PORT} --model /models/DarkIdol-Llama-3.1-8B-Instruct-1.3-Uncensored_Q4_K_M.gguf -ngl 99 -c 16384 --host 0.0.0.0 --no-warmup --flash-attn on
ttl: 1800 # Unload after 30 minutes of inactivity
swap: true # CRITICAL: Unload other models when loading this one
aliases:
- darkidol
- evil-model
- uncensored
# Japanese language model (Llama 3.1 Swallow - Japanese optimized)
swallow:
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-Swallow-8B-Instruct-v0.5-Q4_K_M.gguf -ngl 99 -c 16384 --host 0.0.0.0 --no-warmup --flash-attn on
ttl: 1800 # Unload after 30 minutes of inactivity
swap: true # CRITICAL: Unload other models when loading this one
aliases:
- swallow
- japanese
- japanese-model
2025-12-07 17:15:09 +02:00
# Vision/Multimodal model (MiniCPM-V-4.5 - supports images, video, and GIFs)
vision:
cmd: /app/llama-server --port ${PORT} --model /models/MiniCPM-V-4_5-Q3_K_S.gguf --mmproj /models/MiniCPM-V-4_5-mmproj-f16.gguf -ngl 99 -c 4096 --host 0.0.0.0 --no-warmup --flash-attn on
2025-12-07 17:15:09 +02:00
ttl: 900 # Vision model used less frequently, shorter TTL (15 minutes = 900 seconds)
swap: true # CRITICAL: Unload text models before loading vision
2025-12-07 17:15:09 +02:00
aliases:
- vision
- vision-model
- minicpm
# Server configuration
# llama-swap will listen on this address
# Inside Docker, we bind to 0.0.0.0 to allow bot container to connect