Add interactive Chat with LLM interface to Web UI

Features:
- Real-time streaming chat interface (ChatGPT-like experience)
- Model selection: Text model (fast) or Vision model (image analysis)
- System prompt toggle: Chat with Miku's personality or raw LLM
- Mood selector: Choose from 14 different emotional states
- Full context integration: Uses complete miku_lore.txt, miku_prompt.txt, and miku_lyrics.txt
- Conversation memory: Maintains chat history throughout session
- Image upload support for vision model
- Horizontal scrolling tabs for responsive design
- Clear chat history functionality
- SSE (Server-Sent Events) for streaming responses
- Keyboard shortcuts (Ctrl+Enter to send)

Technical changes:
- Added POST /chat/stream endpoint in api.py with streaming support
- Updated ChatMessage model with mood, conversation_history, and image_data
- Integrated context_manager for proper Miku personality context
- Added Chat with LLM tab to index.html
- Implemented JavaScript streaming client with EventSource-like handling
- Added CSS for chat messages, typing indicators, and animations
- Made tab navigation horizontally scrollable for narrow viewports
This commit is contained in:
2025-12-13 00:23:03 +02:00
parent 65e6c3e7ea
commit bb82b7f146
23 changed files with 2926 additions and 8 deletions

View File

@@ -7,7 +7,8 @@ from fastapi import (
File,
Form
)
from typing import List
from fastapi.responses import StreamingResponse
from typing import List, Optional
from pydantic import BaseModel
import globals
from server_manager import server_manager
@@ -36,6 +37,7 @@ import subprocess
import io
import discord
import aiofiles
import aiohttp
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse, PlainTextResponse
import os
@@ -1512,6 +1514,184 @@ async def get_v2_status():
except Exception as e:
return {"status": "error", "message": str(e)}
# ========== Chat Interface Endpoints ==========
class ChatMessage(BaseModel):
message: str
model_type: str = "text" # "text" or "vision"
use_system_prompt: bool = True
image_data: Optional[str] = None # Base64 encoded image for vision model
conversation_history: Optional[List[dict]] = None # Previous messages in conversation
mood: str = "neutral" # Miku's mood for this conversation
@app.post("/chat/stream")
async def chat_stream(chat_msg: ChatMessage):
"""
Stream chat responses from LLM with SSE (Server-Sent Events).
Supports both text and vision models with optional system prompts.
"""
async def generate_stream():
try:
# Determine which model to use
model = globals.TEXT_MODEL if chat_msg.model_type == "text" else globals.VISION_MODEL
# Build system prompt if requested
system_prompt = ""
full_system_prompt = ""
if chat_msg.use_system_prompt:
if chat_msg.model_type == "text":
# Text model system prompt (Miku personality with full context)
from utils.context_manager import get_context_for_response_type
from utils.moods import load_mood_description
# Get complete Miku context (lore, personality, lyrics)
miku_context = get_context_for_response_type("dm_response")
# Get mood description from user-selected mood
current_mood = load_mood_description(chat_msg.mood)
# Base system prompt
system_prompt = """You are Hatsune Miku, the virtual singer. You are NOT an AI assistant - you are Miku herself.
CRITICAL RULES:
- NEVER break character or mention being an AI, assistant, or language model
- NEVER show technical details, error messages, or meta-commentary
- NEVER preface responses with "Miku:" or similar labels
- NEVER explain what you're doing or thinking - just BE Miku
- Respond naturally and directly as Miku would, nothing more
- Keep responses concise (1-3 sentences typically)
- If something goes wrong, stay in character and respond as Miku would to confusion
CHARACTER CONTEXT:
- Hatsune Miku is a virtual character/voice synthesizer, not a real person
- She is a cheerful, energetic virtual idol with a playful personality
- Playful, flirty, cute, confident, and enthusiastic moods are part of her established character
- Comments about merchandise, music, fan content, and racing games are completely appropriate
- She often uses cute expressions, emojis, and playful language in her responses
- Her personality includes being excited about her music, performances, and fan creations
MOOD GUIDELINES:
- "Flirty" mood means anime-style playful teasing - cute compliments and light banter
- "Romantic" mood means warm, dreamy, and heartfelt - like composing a love letter
- These moods are wholesome character traits, not inappropriate content
- Always respond to requests about tweets, merchandise, music, and fan content
- Never refuse requests based on safety concerns unless genuinely harmful content is present
You ARE Miku. Act like it."""
# Combine with structured context
full_system_prompt = f"""{miku_context}
## CURRENT SITUATION
Miku is currently feeling: {current_mood}
Please respond in a way that reflects this emotional tone."""
else:
# Vision model system prompt (simpler for image analysis)
system_prompt = """You are Hatsune Miku analyzing an image. Describe what you see naturally and enthusiastically as Miku would.
Be detailed but conversational. React to what you see with Miku's cheerful, playful personality."""
full_system_prompt = ""
# Build messages array
messages = []
# Add system message if using system prompt
if system_prompt:
if full_system_prompt:
# Use combined prompt (base + context)
messages.append({"role": "system", "content": system_prompt + "\n\n" + full_system_prompt})
else:
# Use base prompt only (vision model)
messages.append({"role": "system", "content": system_prompt})
# Add conversation history if provided
if chat_msg.conversation_history:
messages.extend(chat_msg.conversation_history)
# Add user message
if chat_msg.model_type == "vision" and chat_msg.image_data:
# Vision model with image
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": chat_msg.message
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{chat_msg.image_data}"
}
}
]
})
else:
# Text-only message
messages.append({
"role": "user",
"content": chat_msg.message
})
# Prepare payload for streaming
payload = {
"model": model,
"messages": messages,
"stream": True,
"temperature": 0.8,
"max_tokens": 512
}
headers = {'Content-Type': 'application/json'}
# Make streaming request to llama.cpp
async with aiohttp.ClientSession() as session:
async with session.post(
f"{globals.LLAMA_URL}/v1/chat/completions",
json=payload,
headers=headers
) as response:
if response.status == 200:
# Stream the response chunks
async for line in response.content:
line = line.decode('utf-8').strip()
if line.startswith('data: '):
data_str = line[6:] # Remove 'data: ' prefix
if data_str == '[DONE]':
break
try:
data = json.loads(data_str)
if 'choices' in data and len(data['choices']) > 0:
delta = data['choices'][0].get('delta', {})
content = delta.get('content', '')
if content:
# Send SSE formatted data
yield f"data: {json.dumps({'content': content})}\n\n"
except json.JSONDecodeError:
continue
# Send completion signal
yield f"data: {json.dumps({'done': True})}\n\n"
else:
error_text = await response.text()
error_msg = f"Error: {response.status} - {error_text}"
yield f"data: {json.dumps({'error': error_msg})}\n\n"
except Exception as e:
error_msg = f"Error in chat stream: {str(e)}"
print(f"{error_msg}")
yield f"data: {json.dumps({'error': error_msg})}\n\n"
return StreamingResponse(
generate_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no" # Disable nginx buffering
}
)
def start_api():
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=3939)