26 lines
1.6 KiB
Plaintext
26 lines
1.6 KiB
Plaintext
|
|
# Decide host and port for your Cat. Default will be localhost:1865
|
||
|
|
# General settings for Cheshire Cat Core
|
||
|
|
CORE_HOST=localhost # Hostname for core service
|
||
|
|
CORE_PORT=1865 # Port for core service
|
||
|
|
LOG_LEVEL=WARNING # Default log level for all services
|
||
|
|
DEBUG=false # Enable debugging for more verbose logs
|
||
|
|
CORE_USE_SECURE_PROTOCOLS=false # Enable HTTPS/WSS for secure connections
|
||
|
|
# API_KEY=meow # Uncomment to set an API key for protected endpoints
|
||
|
|
|
||
|
|
# Settings for Qdrant vector memory service
|
||
|
|
# Uncomment and set the following if you need to specify custom settings
|
||
|
|
QDRANT_HOST=cheshire_cat_vector_memory # Hostname for the Qdrant service
|
||
|
|
QDRANT_PORT=6333 # Port for the Qdrant service
|
||
|
|
|
||
|
|
# Feature toggles
|
||
|
|
SAVE_MEMORY_SNAPSHOTS=false # Toggle for saving memory snapshots on embedder change
|
||
|
|
|
||
|
|
# Ollama-specific settings
|
||
|
|
OLLAMA_HOST=0.0.0.0 # Hostname for Ollama service
|
||
|
|
OLLAMA_PORT=11434 # Port for Ollama service
|
||
|
|
OLLAMA_FLASH_ATTENTION=false # Flash attention setting for Ollama service
|
||
|
|
OLLAMA_DEBUG=false # Debug mode for Ollama service
|
||
|
|
OLLAMA_KEEP_ALIVE="5m" # Duration models stay loaded, default 5 minutes, can be set to e.g., "24h"
|
||
|
|
OLLAMA_MAX_LOADED_MODELS=1 # Maximum number of models loaded simultaneously, default to 1
|
||
|
|
OLLAMA_NUM_PARALLEL=1 # Maximum number of allocated contexts (parallel requests). Manage resource efficiently: If OLLAMA_NUM_PARALLEL=4 and OLLAMA_MAX_LOADED_MODELS=3, the total context requirement might be up to 12 (4x3)
|