Implemented new Japanese only text mode with WebUI toggle, utilizing a llama3.1 swallow dataset model. Next up is Japanese TTS.

This commit is contained in:
2026-01-23 15:02:36 +02:00
parent eb03dfce4d
commit fe0962118b
8 changed files with 318 additions and 44 deletions

View File

@@ -226,6 +226,54 @@ def calm_miku_endpoint():
return {"status": "ok", "message": "Miku has been calmed down"} return {"status": "ok", "message": "Miku has been calmed down"}
# ========== Language Mode Management ==========
@app.get("/language")
def get_language_mode():
"""Get current language mode (english or japanese)"""
return {
"language_mode": globals.LANGUAGE_MODE,
"available_languages": ["english", "japanese"],
"current_model": globals.JAPANESE_TEXT_MODEL if globals.LANGUAGE_MODE == "japanese" else globals.TEXT_MODEL
}
@app.post("/language/toggle")
def toggle_language_mode():
"""Toggle between English and Japanese modes"""
if globals.LANGUAGE_MODE == "english":
globals.LANGUAGE_MODE = "japanese"
new_mode = "japanese"
model_used = globals.JAPANESE_TEXT_MODEL
logger.info("Switched to Japanese mode (using Llama 3.1 Swallow)")
else:
globals.LANGUAGE_MODE = "english"
new_mode = "english"
model_used = globals.TEXT_MODEL
logger.info("Switched to English mode (using default model)")
return {
"status": "ok",
"language_mode": new_mode,
"model_now_using": model_used,
"message": f"Miku is now speaking in {new_mode.upper()}!"
}
@app.post("/language/set")
def set_language_mode(language: str = "english"):
"""Set language mode to either 'english' or 'japanese'"""
if language.lower() not in ["english", "japanese"]:
return {"error": f"Invalid language mode '{language}'. Use 'english' or 'japanese'."}, 400
globals.LANGUAGE_MODE = language.lower()
model_used = globals.JAPANESE_TEXT_MODEL if language.lower() == "japanese" else globals.TEXT_MODEL
logger.info(f"Language mode set to {language.lower()} (using {model_used})")
return {
"status": "ok",
"language_mode": language.lower(),
"model_now_using": model_used,
"message": f"Miku is now speaking in {language.upper()}!"
}
# ========== Evil Mode Management ========== # ========== Evil Mode Management ==========
@app.get("/evil-mode") @app.get("/evil-mode")
def get_evil_mode_status(): def get_evil_mode_status():

View File

@@ -26,8 +26,12 @@ LLAMA_AMD_URL = os.getenv("LLAMA_AMD_URL", "http://llama-swap-amd:8080") # Seco
TEXT_MODEL = os.getenv("TEXT_MODEL", "llama3.1") TEXT_MODEL = os.getenv("TEXT_MODEL", "llama3.1")
VISION_MODEL = os.getenv("VISION_MODEL", "vision") VISION_MODEL = os.getenv("VISION_MODEL", "vision")
EVIL_TEXT_MODEL = os.getenv("EVIL_TEXT_MODEL", "darkidol") # Uncensored model for evil mode EVIL_TEXT_MODEL = os.getenv("EVIL_TEXT_MODEL", "darkidol") # Uncensored model for evil mode
JAPANESE_TEXT_MODEL = os.getenv("JAPANESE_TEXT_MODEL", "swallow") # Llama 3.1 Swallow model for Japanese
OWNER_USER_ID = int(os.getenv("OWNER_USER_ID", "209381657369772032")) # Bot owner's Discord user ID for reports OWNER_USER_ID = int(os.getenv("OWNER_USER_ID", "209381657369772032")) # Bot owner's Discord user ID for reports
# Language mode for Miku (english or japanese)
LANGUAGE_MODE = "english" # Can be "english" or "japanese"
# Fish.audio TTS settings # Fish.audio TTS settings
FISH_API_KEY = os.getenv("FISH_API_KEY", "478d263d8c094e0c8993aae3e9cf9159") FISH_API_KEY = os.getenv("FISH_API_KEY", "478d263d8c094e0c8993aae3e9cf9159")
MIKU_VOICE_ID = os.getenv("MIKU_VOICE_ID", "b28b79555e8c4904ac4d048c36e716b7") MIKU_VOICE_ID = os.getenv("MIKU_VOICE_ID", "b28b79555e8c4904ac4d048c36e716b7")

View File

@@ -660,10 +660,11 @@
<button class="tab-button active" onclick="switchTab('tab1')">Server Management</button> <button class="tab-button active" onclick="switchTab('tab1')">Server Management</button>
<button class="tab-button" onclick="switchTab('tab2')">Actions</button> <button class="tab-button" onclick="switchTab('tab2')">Actions</button>
<button class="tab-button" onclick="switchTab('tab3')">Status</button> <button class="tab-button" onclick="switchTab('tab3')">Status</button>
<button class="tab-button" onclick="switchTab('tab4')">🎨 Image Generation</button> <button class="tab-button" onclick="switchTab('tab4')">⚙️ LLM Settings</button>
<button class="tab-button" onclick="switchTab('tab5')">📊 Autonomous Stats</button> <button class="tab-button" onclick="switchTab('tab5')">🎨 Image Generation</button>
<button class="tab-button" onclick="switchTab('tab6')">💬 Chat with LLM</button> <button class="tab-button" onclick="switchTab('tab6')">📊 Autonomous Stats</button>
<button class="tab-button" onclick="switchTab('tab7')">📞 Voice Call</button> <button class="tab-button" onclick="switchTab('tab7')">💬 Chat with LLM</button>
<button class="tab-button" onclick="switchTab('tab8')">📞 Voice Call</button>
<button class="tab-button" onclick="window.location.href='/static/system.html'">🎛️ System Settings</button> <button class="tab-button" onclick="window.location.href='/static/system.html'">🎛️ System Settings</button>
</div> </div>
@@ -1173,8 +1174,70 @@
</div> </div>
</div> </div>
<!-- Image Generation Tab Content --> <!-- LLM Settings Tab Content -->
<div id="tab4" class="tab-content"> <div id="tab4" class="tab-content">
<div class="section">
<h3>⚙️ Language Model Settings</h3>
<p>Configure language model behavior and language mode.</p>
<!-- Language Mode Section -->
<div style="margin-bottom: 1.5rem; padding: 1rem; background: #2a2a2a; border-radius: 4px; border: 2px solid #4a7bc9;">
<h4 style="margin-top: 0; color: #61dafb;">🌐 Language Mode</h4>
<p style="margin: 0.5rem 0; color: #aaa;">Switch Miku between English and Japanese responses.</p>
<div style="margin: 1rem 0;">
<div style="margin-bottom: 1rem;">
<strong>Current Language:</strong> <span id="current-language-display" style="color: #61dafb; font-weight: bold;">English</span>
</div>
<button onclick="toggleLanguageMode()" style="background: #4a7bc9; color: #fff; padding: 0.6rem 1.2rem; border: 2px solid #61dafb; border-radius: 4px; cursor: pointer; font-weight: bold; font-size: 1rem;">
🔄 Toggle Language (English ↔ Japanese)
</button>
</div>
<div style="margin-top: 1rem; padding: 1rem; background: #1a1a1a; border-radius: 4px; border-left: 3px solid #4a7bc9;">
<div style="font-size: 0.9rem;">
<div style="margin-bottom: 0.5rem;"><strong>English Mode:</strong></div>
<ul style="margin: 0 0 0.5rem 0; padding-left: 1.5rem; color: #aaa;">
<li>Uses standard Llama 3.1 model</li>
<li>Responds in English only</li>
</ul>
<div style="margin-bottom: 0.5rem;"><strong>Japanese Mode (日本語):</strong></div>
<ul style="margin: 0 0 0; padding-left: 1.5rem; color: #aaa;">
<li>Uses Llama 3.1 Swallow model (trained for Japanese)</li>
<li>Responds entirely in Japanese</li>
</ul>
</div>
</div>
</div>
<!-- Language Mode Status Section -->
<div style="margin-bottom: 1.5rem; padding: 1rem; background: #2a2a2a; border-radius: 4px;">
<h4 style="margin-top: 0;">📊 Current Status</h4>
<div id="language-status-display" style="background: #1a1a1a; padding: 1rem; border-radius: 4px; font-family: monospace; font-size: 0.9rem;">
<p style="margin: 0.5rem 0;"><strong>Language Mode:</strong> <span id="status-language">English</span></p>
<p style="margin: 0.5rem 0;"><strong>Active Model:</strong> <span id="status-model">llama3.1</span></p>
<p style="margin: 0.5rem 0;"><strong>Available Languages:</strong> English, 日本語 (Japanese)</p>
</div>
<button onclick="refreshLanguageStatus()" style="margin-top: 1rem;">🔄 Refresh Status</button>
</div>
<!-- Information Section -->
<div style="padding: 1rem; background: #1a1a1a; border-radius: 4px; border-left: 3px solid #ff9800;">
<h4 style="margin-top: 0; color: #ff9800;"> How Language Mode Works</h4>
<ul style="margin: 0.5rem 0; padding-left: 1.5rem; font-size: 0.9rem; color: #aaa;">
<li>English mode uses your default text model for English responses</li>
<li>Japanese mode switches to Swallow and responds only in 日本語</li>
<li>All personality traits, mood system, and features work in both modes</li>
<li>Language mode is global - affects all servers and DMs</li>
<li>Conversation history is preserved across language switches</li>
</ul>
</div>
</div>
</div>
<!-- Image Generation Tab Content -->
<div id="tab5" class="tab-content">
<div class="section"> <div class="section">
<h3>🎨 Image Generation System</h3> <h3>🎨 Image Generation System</h3>
<p>Natural language image generation powered by ComfyUI. Users can ask Miku to create images naturally without commands!</p> <p>Natural language image generation powered by ComfyUI. Users can ask Miku to create images naturally without commands!</p>
@@ -1232,7 +1295,7 @@
</div> </div>
<!-- Autonomous Stats Tab Content --> <!-- Autonomous Stats Tab Content -->
<div id="tab5" class="tab-content"> <div id="tab6" class="tab-content">
<div class="section"> <div class="section">
<h3>📊 Autonomous V2 Decision Engine Stats</h3> <h3>📊 Autonomous V2 Decision Engine Stats</h3>
<p>Real-time monitoring of Miku's autonomous decision-making context and mood-based personality stats.</p> <p>Real-time monitoring of Miku's autonomous decision-making context and mood-based personality stats.</p>
@@ -1250,7 +1313,7 @@
</div> </div>
<!-- Chat with LLM Tab Content --> <!-- Chat with LLM Tab Content -->
<div id="tab6" class="tab-content"> <div id="tab7" class="tab-content">
<div class="section"> <div class="section">
<h3>💬 Chat with LLM</h3> <h3>💬 Chat with LLM</h3>
<p>Direct chat interface with the language models. Test responses, experiment with prompts, or just chat with Miku!</p> <p>Direct chat interface with the language models. Test responses, experiment with prompts, or just chat with Miku!</p>
@@ -1375,8 +1438,8 @@
</div> </div>
</div> </div>
<!-- Tab 7: Voice Call Management --> <!-- Tab 8: Voice Call Management -->
<div id="tab7" class="tab-content"> <div id="tab8" class="tab-content">
<div class="section"> <div class="section">
<h3>📞 Initiate Voice Call</h3> <h3>📞 Initiate Voice Call</h3>
<p>Start an automated voice chat session with a user. Miku will automatically manage containers, join voice chat, and send an invitation DM.</p> <p>Start an automated voice chat session with a user. Miku will automatically manage containers, join voice chat, and send an invitation DM.</p>
@@ -1559,6 +1622,7 @@ document.addEventListener('DOMContentLoaded', function() {
checkEvilModeStatus(); // Check evil mode on load checkEvilModeStatus(); // Check evil mode on load
checkBipolarModeStatus(); // Check bipolar mode on load checkBipolarModeStatus(); // Check bipolar mode on load
checkGPUStatus(); // Check GPU selection on load checkGPUStatus(); // Check GPU selection on load
refreshLanguageStatus(); // Check language mode on load
console.log('🚀 DOMContentLoaded - initializing figurine subscribers list'); console.log('🚀 DOMContentLoaded - initializing figurine subscribers list');
refreshFigurineSubscribers(); refreshFigurineSubscribers();
loadProfilePictureMetadata(); loadProfilePictureMetadata();
@@ -2251,6 +2315,43 @@ async function calmMiku() {
} }
} }
// ===== Language Mode Functions =====
async function refreshLanguageStatus() {
try {
const result = await apiCall('/language');
document.getElementById('current-language-display').textContent =
result.language_mode === 'japanese' ? '日本語 (Japanese)' : 'English';
document.getElementById('status-language').textContent =
result.language_mode === 'japanese' ? '日本語 (Japanese)' : 'English';
document.getElementById('status-model').textContent = result.current_model;
console.log('Language status:', result);
} catch (error) {
console.error('Failed to get language status:', error);
showNotification('Failed to load language status', 'error');
}
}
async function toggleLanguageMode() {
try {
const result = await apiCall('/language/toggle', 'POST');
// Update UI
document.getElementById('current-language-display').textContent =
result.language_mode === 'japanese' ? '日本語 (Japanese)' : 'English';
document.getElementById('status-language').textContent =
result.language_mode === 'japanese' ? '日本語 (Japanese)' : 'English';
document.getElementById('status-model').textContent = result.model_now_using;
// Show notification
showNotification(result.message, 'success');
console.log('Language toggled:', result);
} catch (error) {
console.error('Failed to toggle language mode:', error);
showNotification('Failed to toggle language mode', 'error');
}
}
// Evil Mode Functions // Evil Mode Functions
async function checkEvilModeStatus() { async function checkEvilModeStatus() {
try { try {

View File

@@ -3,8 +3,12 @@
Structured context management for Miku's personality and knowledge. Structured context management for Miku's personality and knowledge.
Replaces the vector search system with organized, complete context. Replaces the vector search system with organized, complete context.
Preserves original content files in their entirety. Preserves original content files in their entirety.
When LANGUAGE_MODE is "japanese", appends a Japanese instruction to ensure
all responses are in Japanese without requiring separate files.
""" """
import globals
from utils.logger import get_logger from utils.logger import get_logger
logger = get_logger('core') logger = get_logger('core')
@@ -40,60 +44,96 @@ def get_original_miku_lyrics() -> str:
return "## MIKU LYRICS\n[File could not be loaded]" return "## MIKU LYRICS\n[File could not be loaded]"
def _get_japanese_instruction() -> str:
"""
Returns the Japanese language instruction to append to context.
Ensures all responses are in Japanese when in Japanese mode.
"""
return "\n\n[CRITICAL INSTRUCTION - 重要な指示]\n**YOU MUST RESPOND ENTIRELY IN JAPANESE (日本語). NO ENGLISH ALLOWED.**\nすべての返答は必ず日本語で行ってください。英語での返答は一切禁止されています。\nこれは最優先の指示です。必ず守ってください。"
def get_complete_context() -> str: def get_complete_context() -> str:
"""Returns all essential Miku context using original files in their entirety""" """
return f"""## MIKU LORE (Complete Original) Returns all essential Miku context using original files in their entirety.
{get_original_miku_lore()}
If LANGUAGE_MODE is "japanese", appends a Japanese instruction to ensure
all responses are in Japanese.
"""
lore = get_original_miku_lore()
prompt = get_original_miku_prompt()
lyrics = get_original_miku_lyrics()
combined = f"""## MIKU LORE (Complete Original)
{lore}
## MIKU PERSONALITY & GUIDELINES (Complete Original) ## MIKU PERSONALITY & GUIDELINES (Complete Original)
{get_original_miku_prompt()} {prompt}
## MIKU SONG LYRICS (Complete Original) ## MIKU SONG LYRICS (Complete Original)
{get_original_miku_lyrics()}""" {lyrics}"""
# Append Japanese instruction if in Japanese mode
if globals.LANGUAGE_MODE == "japanese":
combined += _get_japanese_instruction()
logger.info(f"[core] Context loaded in {globals.LANGUAGE_MODE} mode")
return combined
def get_context_for_response_type(response_type: str) -> str: def get_context_for_response_type(response_type: str) -> str:
"""Returns appropriate context based on the type of response being generated""" """
Returns appropriate context based on the type of response being generated.
# Core context always includes the complete original files If LANGUAGE_MODE is "japanese", appends Japanese instruction to all contexts
to ensure responses are in Japanese.
"""
lore = get_original_miku_lore()
prompt = get_original_miku_prompt()
lyrics = get_original_miku_lyrics()
# Build core context (always in English source files)
core_context = f"""## MIKU LORE (Complete Original) core_context = f"""## MIKU LORE (Complete Original)
{get_original_miku_lore()} {lore}
## MIKU PERSONALITY & GUIDELINES (Complete Original) ## MIKU PERSONALITY & GUIDELINES (Complete Original)
{get_original_miku_prompt()}""" {prompt}"""
# Return context based on response type
if response_type == "autonomous_general": if response_type == "autonomous_general":
# For general autonomous messages, include everything context = f"""{core_context}
return f"""{core_context}
## MIKU SONG LYRICS (Complete Original) ## MIKU SONG LYRICS (Complete Original)
{get_original_miku_lyrics()}""" {lyrics}"""
elif response_type == "autonomous_tweet": elif response_type == "autonomous_tweet":
# For tweet responses, include lyrics for musical context context = f"""{core_context}
return f"""{core_context}
## MIKU SONG LYRICS (Complete Original) ## MIKU SONG LYRICS (Complete Original)
{get_original_miku_lyrics()}""" {lyrics}"""
elif response_type == "dm_response" or response_type == "server_response": elif response_type == "dm_response" or response_type == "server_response":
# For conversational responses, include everything context = f"""{core_context}
return f"""{core_context}
## MIKU SONG LYRICS (Complete Original) ## MIKU SONG LYRICS (Complete Original)
{get_original_miku_lyrics()}""" {lyrics}"""
elif response_type == "conversation_join": elif response_type == "conversation_join":
# For joining conversations, include everything context = f"""{core_context}
return f"""{core_context}
## MIKU SONG LYRICS (Complete Original) ## MIKU SONG LYRICS (Complete Original)
{get_original_miku_lyrics()}""" {lyrics}"""
elif response_type == "emoji_selection": elif response_type == "emoji_selection":
# For emoji reactions, no context needed - the prompt has everything # For emoji reactions, minimal context needed
return "" context = ""
else: else:
# Default: comprehensive context # Default: comprehensive context
return get_complete_context() context = get_complete_context()
# Append Japanese instruction if in Japanese mode
if globals.LANGUAGE_MODE == "japanese" and context:
context += _get_japanese_instruction()
return context

View File

@@ -239,7 +239,13 @@ async def analyze_image_with_vision(base64_img):
Uses OpenAI-compatible chat completions API with image_url. Uses OpenAI-compatible chat completions API with image_url.
Always uses NVIDIA GPU for vision model. Always uses NVIDIA GPU for vision model.
""" """
from utils.llm import get_vision_gpu_url from utils.llm import get_vision_gpu_url, check_vision_endpoint_health
# Check if vision endpoint is healthy before attempting request
is_healthy, error = await check_vision_endpoint_health()
if not is_healthy:
logger.warning(f"Vision endpoint unhealthy: {error}")
return f"Vision service currently unavailable: {error}"
payload = { payload = {
"model": globals.VISION_MODEL, "model": globals.VISION_MODEL,
@@ -269,17 +275,20 @@ async def analyze_image_with_vision(base64_img):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
try: try:
vision_url = get_vision_gpu_url() vision_url = get_vision_gpu_url()
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response: logger.info(f"Sending vision request to {vision_url} using model: {globals.VISION_MODEL}")
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=60)) as response:
if response.status == 200: if response.status == 200:
data = await response.json() data = await response.json()
return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.") result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
logger.info(f"Vision analysis completed successfully")
return result
else: else:
error_text = await response.text() error_text = await response.text()
logger.error(f"Vision API error: {response.status} - {error_text}") logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})")
return f"Error analyzing image: {response.status}" return f"Error analyzing image: {response.status}"
except Exception as e: except Exception as e:
logger.error(f"Error in analyze_image_with_vision: {e}") logger.error(f"Error in analyze_image_with_vision: {e}", exc_info=True)
return f"Error analyzing image: {str(e)}"
async def analyze_video_with_vision(video_frames, media_type="video"): async def analyze_video_with_vision(video_frames, media_type="video"):
@@ -288,6 +297,13 @@ async def analyze_video_with_vision(video_frames, media_type="video"):
video_frames: list of base64-encoded frames video_frames: list of base64-encoded frames
media_type: "video", "gif", or "tenor_gif" to customize the analysis prompt media_type: "video", "gif", or "tenor_gif" to customize the analysis prompt
""" """
from utils.llm import get_vision_gpu_url, check_vision_endpoint_health
# Check if vision endpoint is healthy before attempting request
is_healthy, error = await check_vision_endpoint_health()
if not is_healthy:
logger.warning(f"Vision endpoint unhealthy: {error}")
return f"Vision service currently unavailable: {error}"
# Customize prompt based on media type # Customize prompt based on media type
if media_type == "gif": if media_type == "gif":
@@ -331,16 +347,20 @@ async def analyze_video_with_vision(video_frames, media_type="video"):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
try: try:
vision_url = get_vision_gpu_url() vision_url = get_vision_gpu_url()
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response: logger.info(f"Sending video analysis request to {vision_url} using model: {globals.VISION_MODEL} (media_type: {media_type}, frames: {len(video_frames)})")
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)) as response:
if response.status == 200: if response.status == 200:
data = await response.json() data = await response.json()
return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.") result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
logger.info(f"Video analysis completed successfully")
return result
else: else:
error_text = await response.text() error_text = await response.text()
logger.error(f"Vision API error: {response.status} - {error_text}") logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})")
return f"Error analyzing video: {response.status}" return f"Error analyzing video: {response.status}"
except Exception as e: except Exception as e:
logger.error(f"Error in analyze_video_with_vision: {e}") logger.error(f"Error in analyze_video_with_vision: {e}", exc_info=True)
return f"Error analyzing video: {str(e)}" return f"Error analyzing video: {str(e)}"

View File

@@ -38,8 +38,47 @@ def get_vision_gpu_url():
Strategy: Always use NVIDIA GPU for vision to avoid unloading/reloading. Strategy: Always use NVIDIA GPU for vision to avoid unloading/reloading.
- When NVIDIA is primary: Use NVIDIA for both text and vision - When NVIDIA is primary: Use NVIDIA for both text and vision
- When AMD is primary: Use AMD for text, NVIDIA for vision (keeps vision loaded) - When AMD is primary: Use AMD for text, NVIDIA for vision (keeps vision loaded)
Important: Vision model (MiniCPM-V) is ONLY configured on NVIDIA GPU.
This ensures vision inference is always fast and doesn't interfere with
AMD text model inference.
""" """
return globals.LLAMA_URL # Always use NVIDIA for vision current_text_gpu = get_current_gpu_url()
nvidia_vision_url = globals.LLAMA_URL
# Vision ALWAYS uses NVIDIA, regardless of which GPU is primary for text
# Log this decision when GPU switching is active (primary text GPU is AMD)
if current_text_gpu == globals.LLAMA_AMD_URL:
logger.debug(f"Primary GPU is AMD for text, but using NVIDIA for vision model")
return nvidia_vision_url # Always use NVIDIA for vision
async def check_vision_endpoint_health():
"""
Check if NVIDIA GPU vision endpoint is healthy and responsive.
This is important when AMD is the primary GPU to ensure vision still works.
Returns:
Tuple of (is_healthy: bool, error_message: Optional[str])
"""
import aiohttp
vision_url = get_vision_gpu_url()
try:
async with aiohttp.ClientSession() as session:
async with session.get(f"{vision_url}/health", timeout=aiohttp.ClientTimeout(total=5)) as response:
is_healthy = response.status == 200
if is_healthy:
logger.info(f"Vision endpoint ({vision_url}) health check: OK")
else:
logger.warning(f"Vision endpoint ({vision_url}) health check failed: status {response.status}")
return is_healthy, None if is_healthy else f"Status {response.status}"
except asyncio.TimeoutError:
logger.error(f"Vision endpoint ({vision_url}) health check: timeout")
return False, "Endpoint timeout"
except Exception as e:
logger.error(f"Vision endpoint ({vision_url}) health check error: {e}")
return False, str(e)
def _strip_surrounding_quotes(text): def _strip_surrounding_quotes(text):
""" """
@@ -108,8 +147,12 @@ async def query_llama(user_prompt, user_id, guild_id=None, response_type="dm_res
if evil_mode: if evil_mode:
model = globals.EVIL_TEXT_MODEL # Use DarkIdol uncensored model model = globals.EVIL_TEXT_MODEL # Use DarkIdol uncensored model
logger.info(f"Using evil model: {model}") logger.info(f"Using evil model: {model}")
elif globals.LANGUAGE_MODE == "japanese":
model = globals.JAPANESE_TEXT_MODEL # Use Swallow for Japanese
logger.info(f"Using Japanese model: {model}")
else: else:
model = globals.TEXT_MODEL model = globals.TEXT_MODEL
logger.info(f"Using default model: {model}")
# Determine channel_id for conversation history # Determine channel_id for conversation history
# For servers, use guild_id; for DMs, use user_id # For servers, use guild_id; for DMs, use user_id

View File

@@ -19,6 +19,15 @@ models:
- evil-model - evil-model
- uncensored - uncensored
# Japanese language model (Llama 3.1 Swallow - Japanese optimized)
swallow:
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-Swallow-8B-Instruct-v0.5-Q4_K_M.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0 --no-warmup
ttl: 1800 # Unload after 30 minutes of inactivity
aliases:
- swallow
- japanese
- japanese-model
# Vision/Multimodal model (MiniCPM-V-4.5 - supports images, video, and GIFs) # Vision/Multimodal model (MiniCPM-V-4.5 - supports images, video, and GIFs)
vision: vision:
cmd: /app/llama-server --port ${PORT} --model /models/MiniCPM-V-4_5-Q3_K_S.gguf --mmproj /models/MiniCPM-V-4_5-mmproj-f16.gguf -ngl 99 -c 4096 --host 0.0.0.0 --no-warmup cmd: /app/llama-server --port ${PORT} --model /models/MiniCPM-V-4_5-Q3_K_S.gguf --mmproj /models/MiniCPM-V-4_5-mmproj-f16.gguf -ngl 99 -c 4096 --host 0.0.0.0 --no-warmup

View File

@@ -20,6 +20,15 @@ models:
- evil-model - evil-model
- uncensored - uncensored
# Japanese language model (Llama 3.1 Swallow - Japanese optimized)
swallow:
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-Swallow-8B-Instruct-v0.5-Q4_K_M.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0 --no-warmup
ttl: 1800 # Unload after 30 minutes of inactivity
aliases:
- swallow
- japanese
- japanese-model
# Server configuration # Server configuration
# llama-swap will listen on this address # llama-swap will listen on this address
# Inside Docker, we bind to 0.0.0.0 to allow bot container to connect # Inside Docker, we bind to 0.0.0.0 to allow bot container to connect