Implemented new Japanese only text mode with WebUI toggle, utilizing a llama3.1 swallow dataset model. Next up is Japanese TTS.
This commit is contained in:
@@ -38,8 +38,47 @@ def get_vision_gpu_url():
|
||||
Strategy: Always use NVIDIA GPU for vision to avoid unloading/reloading.
|
||||
- When NVIDIA is primary: Use NVIDIA for both text and vision
|
||||
- When AMD is primary: Use AMD for text, NVIDIA for vision (keeps vision loaded)
|
||||
|
||||
Important: Vision model (MiniCPM-V) is ONLY configured on NVIDIA GPU.
|
||||
This ensures vision inference is always fast and doesn't interfere with
|
||||
AMD text model inference.
|
||||
"""
|
||||
return globals.LLAMA_URL # Always use NVIDIA for vision
|
||||
current_text_gpu = get_current_gpu_url()
|
||||
nvidia_vision_url = globals.LLAMA_URL
|
||||
|
||||
# Vision ALWAYS uses NVIDIA, regardless of which GPU is primary for text
|
||||
# Log this decision when GPU switching is active (primary text GPU is AMD)
|
||||
if current_text_gpu == globals.LLAMA_AMD_URL:
|
||||
logger.debug(f"Primary GPU is AMD for text, but using NVIDIA for vision model")
|
||||
|
||||
return nvidia_vision_url # Always use NVIDIA for vision
|
||||
|
||||
async def check_vision_endpoint_health():
|
||||
"""
|
||||
Check if NVIDIA GPU vision endpoint is healthy and responsive.
|
||||
This is important when AMD is the primary GPU to ensure vision still works.
|
||||
|
||||
Returns:
|
||||
Tuple of (is_healthy: bool, error_message: Optional[str])
|
||||
"""
|
||||
import aiohttp
|
||||
vision_url = get_vision_gpu_url()
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(f"{vision_url}/health", timeout=aiohttp.ClientTimeout(total=5)) as response:
|
||||
is_healthy = response.status == 200
|
||||
if is_healthy:
|
||||
logger.info(f"Vision endpoint ({vision_url}) health check: OK")
|
||||
else:
|
||||
logger.warning(f"Vision endpoint ({vision_url}) health check failed: status {response.status}")
|
||||
return is_healthy, None if is_healthy else f"Status {response.status}"
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"Vision endpoint ({vision_url}) health check: timeout")
|
||||
return False, "Endpoint timeout"
|
||||
except Exception as e:
|
||||
logger.error(f"Vision endpoint ({vision_url}) health check error: {e}")
|
||||
return False, str(e)
|
||||
|
||||
def _strip_surrounding_quotes(text):
|
||||
"""
|
||||
@@ -108,8 +147,12 @@ async def query_llama(user_prompt, user_id, guild_id=None, response_type="dm_res
|
||||
if evil_mode:
|
||||
model = globals.EVIL_TEXT_MODEL # Use DarkIdol uncensored model
|
||||
logger.info(f"Using evil model: {model}")
|
||||
elif globals.LANGUAGE_MODE == "japanese":
|
||||
model = globals.JAPANESE_TEXT_MODEL # Use Swallow for Japanese
|
||||
logger.info(f"Using Japanese model: {model}")
|
||||
else:
|
||||
model = globals.TEXT_MODEL
|
||||
logger.info(f"Using default model: {model}")
|
||||
|
||||
# Determine channel_id for conversation history
|
||||
# For servers, use guild_id; for DMs, use user_id
|
||||
|
||||
Reference in New Issue
Block a user