Implemented new Japanese only text mode with WebUI toggle, utilizing a llama3.1 swallow dataset model. Next up is Japanese TTS.

This commit is contained in:
2026-01-23 15:02:36 +02:00
parent eb03dfce4d
commit fe0962118b
8 changed files with 318 additions and 44 deletions

View File

@@ -38,8 +38,47 @@ def get_vision_gpu_url():
Strategy: Always use NVIDIA GPU for vision to avoid unloading/reloading.
- When NVIDIA is primary: Use NVIDIA for both text and vision
- When AMD is primary: Use AMD for text, NVIDIA for vision (keeps vision loaded)
Important: Vision model (MiniCPM-V) is ONLY configured on NVIDIA GPU.
This ensures vision inference is always fast and doesn't interfere with
AMD text model inference.
"""
return globals.LLAMA_URL # Always use NVIDIA for vision
current_text_gpu = get_current_gpu_url()
nvidia_vision_url = globals.LLAMA_URL
# Vision ALWAYS uses NVIDIA, regardless of which GPU is primary for text
# Log this decision when GPU switching is active (primary text GPU is AMD)
if current_text_gpu == globals.LLAMA_AMD_URL:
logger.debug(f"Primary GPU is AMD for text, but using NVIDIA for vision model")
return nvidia_vision_url # Always use NVIDIA for vision
async def check_vision_endpoint_health():
"""
Check if NVIDIA GPU vision endpoint is healthy and responsive.
This is important when AMD is the primary GPU to ensure vision still works.
Returns:
Tuple of (is_healthy: bool, error_message: Optional[str])
"""
import aiohttp
vision_url = get_vision_gpu_url()
try:
async with aiohttp.ClientSession() as session:
async with session.get(f"{vision_url}/health", timeout=aiohttp.ClientTimeout(total=5)) as response:
is_healthy = response.status == 200
if is_healthy:
logger.info(f"Vision endpoint ({vision_url}) health check: OK")
else:
logger.warning(f"Vision endpoint ({vision_url}) health check failed: status {response.status}")
return is_healthy, None if is_healthy else f"Status {response.status}"
except asyncio.TimeoutError:
logger.error(f"Vision endpoint ({vision_url}) health check: timeout")
return False, "Endpoint timeout"
except Exception as e:
logger.error(f"Vision endpoint ({vision_url}) health check error: {e}")
return False, str(e)
def _strip_surrounding_quotes(text):
"""
@@ -108,8 +147,12 @@ async def query_llama(user_prompt, user_id, guild_id=None, response_type="dm_res
if evil_mode:
model = globals.EVIL_TEXT_MODEL # Use DarkIdol uncensored model
logger.info(f"Using evil model: {model}")
elif globals.LANGUAGE_MODE == "japanese":
model = globals.JAPANESE_TEXT_MODEL # Use Swallow for Japanese
logger.info(f"Using Japanese model: {model}")
else:
model = globals.TEXT_MODEL
logger.info(f"Using default model: {model}")
# Determine channel_id for conversation history
# For servers, use guild_id; for DMs, use user_id