feat: add Traefik proxy, custom chat template, improve Cheshire Cat memory
docker-compose.yml: - Add Traefik proxy network + labels for miku.panel domain - Connect miku-bot service to proxy network llama-swap-config.yaml / llama-swap-rocm-config.yaml: - Add --chat-template-file flag to disable Llama 3.1 built-in tool calling (was causing malformed responses) - ROCm config: add Rocinante-X 12B model entry for comparison testing cheshire-cat discord_bridge plugin: - Increase declarative memory recall (k=3→10, threshold 0.7→0.5) for better factual retrieval - Add agent_prompt_prefix hook to enforce factual accuracy from declarative memories - Add before_agent_starts debug logging for memory inspection - Add passthrough hooks for message/suffix pipeline
This commit is contained in:
@@ -4,8 +4,9 @@
|
||||
|
||||
models:
|
||||
# Main text generation model (same name as NVIDIA for uniform switching)
|
||||
# Custom chat template to disable built-in tool calling
|
||||
llama3.1:
|
||||
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf -ngl 99 -c 16384 --host 0.0.0.0 --no-warmup --flash-attn on
|
||||
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf -ngl 99 -c 16384 --host 0.0.0.0 --no-warmup --flash-attn on --chat-template-file /app/llama31_notool_template.jinja
|
||||
ttl: 1800 # Unload after 30 minutes of inactivity (1800 seconds)
|
||||
aliases:
|
||||
- llama3.1
|
||||
@@ -20,6 +21,14 @@ models:
|
||||
- evil-model
|
||||
- uncensored
|
||||
|
||||
# Rocinante-X 12B - larger creative/RP model for comparison testing
|
||||
rocinante:
|
||||
cmd: /app/llama-server --port ${PORT} --model /models/Rocinante-X-12B-v1b-Q5_K_M.gguf -ngl 99 -c 8192 --host 0.0.0.0 --no-warmup --flash-attn on
|
||||
ttl: 1800 # Unload after 30 minutes of inactivity
|
||||
aliases:
|
||||
- rocinante
|
||||
- rocinante-12b
|
||||
|
||||
# Japanese language model (Llama 3.1 Swallow - Japanese optimized)
|
||||
swallow:
|
||||
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-Swallow-8B-Instruct-v0.5-Q4_K_M.gguf -ngl 99 -c 16384 --host 0.0.0.0 --no-warmup --flash-attn on
|
||||
|
||||
Reference in New Issue
Block a user