Initial commit: Miku Discord Bot
This commit is contained in:
24
llama-swap-config.yaml
Normal file
24
llama-swap-config.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# llama-swap configuration for Miku Discord Bot
|
||||
# This manages automatic model switching and unloading
|
||||
|
||||
models:
|
||||
# Main text generation model (Llama 3.1 8B)
|
||||
llama3.1:
|
||||
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0
|
||||
ttl: 1800 # Unload after 30 minutes of inactivity (1800 seconds)
|
||||
aliases:
|
||||
- llama3.1
|
||||
- text-model
|
||||
|
||||
# Vision/Multimodal model (MiniCPM-V-4.5 - supports images, video, and GIFs)
|
||||
vision:
|
||||
cmd: /app/llama-server --port ${PORT} --model /models/MiniCPM-V-4_5-Q3_K_S.gguf --mmproj /models/MiniCPM-V-4_5-mmproj-f16.gguf -ngl 99 -c 4096 --host 0.0.0.0
|
||||
ttl: 900 # Vision model used less frequently, shorter TTL (15 minutes = 900 seconds)
|
||||
aliases:
|
||||
- vision
|
||||
- vision-model
|
||||
- minicpm
|
||||
|
||||
# Server configuration
|
||||
# llama-swap will listen on this address
|
||||
# Inside Docker, we bind to 0.0.0.0 to allow bot container to connect
|
||||
Reference in New Issue
Block a user