Implemented new Japanese only text mode with WebUI toggle, utilizing a llama3.1 swallow dataset model. Next up is Japanese TTS.

This commit is contained in:
2026-01-23 15:02:36 +02:00
parent eb03dfce4d
commit fe0962118b
8 changed files with 318 additions and 44 deletions

View File

@@ -239,7 +239,13 @@ async def analyze_image_with_vision(base64_img):
Uses OpenAI-compatible chat completions API with image_url.
Always uses NVIDIA GPU for vision model.
"""
from utils.llm import get_vision_gpu_url
from utils.llm import get_vision_gpu_url, check_vision_endpoint_health
# Check if vision endpoint is healthy before attempting request
is_healthy, error = await check_vision_endpoint_health()
if not is_healthy:
logger.warning(f"Vision endpoint unhealthy: {error}")
return f"Vision service currently unavailable: {error}"
payload = {
"model": globals.VISION_MODEL,
@@ -269,17 +275,20 @@ async def analyze_image_with_vision(base64_img):
async with aiohttp.ClientSession() as session:
try:
vision_url = get_vision_gpu_url()
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response:
logger.info(f"Sending vision request to {vision_url} using model: {globals.VISION_MODEL}")
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=60)) as response:
if response.status == 200:
data = await response.json()
return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
logger.info(f"Vision analysis completed successfully")
return result
else:
error_text = await response.text()
logger.error(f"Vision API error: {response.status} - {error_text}")
logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})")
return f"Error analyzing image: {response.status}"
except Exception as e:
logger.error(f"Error in analyze_image_with_vision: {e}")
return f"Error analyzing image: {str(e)}"
logger.error(f"Error in analyze_image_with_vision: {e}", exc_info=True)
async def analyze_video_with_vision(video_frames, media_type="video"):
@@ -288,6 +297,13 @@ async def analyze_video_with_vision(video_frames, media_type="video"):
video_frames: list of base64-encoded frames
media_type: "video", "gif", or "tenor_gif" to customize the analysis prompt
"""
from utils.llm import get_vision_gpu_url, check_vision_endpoint_health
# Check if vision endpoint is healthy before attempting request
is_healthy, error = await check_vision_endpoint_health()
if not is_healthy:
logger.warning(f"Vision endpoint unhealthy: {error}")
return f"Vision service currently unavailable: {error}"
# Customize prompt based on media type
if media_type == "gif":
@@ -331,16 +347,20 @@ async def analyze_video_with_vision(video_frames, media_type="video"):
async with aiohttp.ClientSession() as session:
try:
vision_url = get_vision_gpu_url()
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response:
logger.info(f"Sending video analysis request to {vision_url} using model: {globals.VISION_MODEL} (media_type: {media_type}, frames: {len(video_frames)})")
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)) as response:
if response.status == 200:
data = await response.json()
return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
logger.info(f"Video analysis completed successfully")
return result
else:
error_text = await response.text()
logger.error(f"Vision API error: {response.status} - {error_text}")
logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})")
return f"Error analyzing video: {response.status}"
except Exception as e:
logger.error(f"Error in analyze_video_with_vision: {e}")
logger.error(f"Error in analyze_video_with_vision: {e}", exc_info=True)
return f"Error analyzing video: {str(e)}"