Add interactive Chat with LLM interface to Web UI

Features: - Real-time streaming chat interface (ChatGPT-like experience) - Model selection: Text model (fast) or Vision model (image analysis) - System prompt toggle: Chat with Miku's personality or raw LLM - Mood selector: Choose from 14 different emotional states - Full context integration: Uses complete miku_lore.txt, miku_prompt.txt, and miku_lyrics.txt - Conversation memory: Maintains chat history throughout session - Image upload support for vision model - Horizontal scrolling tabs for responsive design - Clear chat history functionality - SSE (Server-Sent Events) for streaming responses - Keyboard shortcuts (Ctrl+Enter to send) Technical changes: - Added POST /chat/stream endpoint in api.py with streaming support - Updated ChatMessage model with mood, conversation_history, and image_data - Integrated context_manager for proper Miku personality context - Added Chat with LLM tab to index.html - Implemented JavaScript streaming client with EventSource-like handling - Added CSS for chat messages, typing indicators, and animations - Made tab navigation horizontally scrollable for narrow viewports
2025-12-13 00:23:03 +02:00
parent 65e6c3e7ea
commit bb82b7f146
23 changed files with 2926 additions and 8 deletions
--- a/bot/api.py
+++ b/bot/api.py
@@ -7,7 +7,8 @@ from fastapi import (
        File,
        Form
 )
-from typing import List
+from fastapi.responses import StreamingResponse
+from typing import List, Optional
 from pydantic import BaseModel
 import globals
 from server_manager import server_manager
@@ -36,6 +37,7 @@ import subprocess
 import io
 import discord
 import aiofiles
+import aiohttp
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse, PlainTextResponse
 import os
@@ -1512,6 +1514,184 @@ async def get_v2_status():
    except Exception as e:
        return {"status": "error", "message": str(e)}

+# ========== Chat Interface Endpoints ==========
+
+class ChatMessage(BaseModel):
+    message: str
+    model_type: str = "text"  # "text" or "vision"
+    use_system_prompt: bool = True
+    image_data: Optional[str] = None  # Base64 encoded image for vision model
+    conversation_history: Optional[List[dict]] = None  # Previous messages in conversation
+    mood: str = "neutral"  # Miku's mood for this conversation
+
+@app.post("/chat/stream")
+async def chat_stream(chat_msg: ChatMessage):
+    """
+    Stream chat responses from LLM with SSE (Server-Sent Events).
+    Supports both text and vision models with optional system prompts.
+    """
+    
+    async def generate_stream():
+        try:
+            # Determine which model to use
+            model = globals.TEXT_MODEL if chat_msg.model_type == "text" else globals.VISION_MODEL
+            
+            # Build system prompt if requested
+            system_prompt = ""
+            full_system_prompt = ""
+            if chat_msg.use_system_prompt:
+                if chat_msg.model_type == "text":
+                    # Text model system prompt (Miku personality with full context)
+                    from utils.context_manager import get_context_for_response_type
+                    from utils.moods import load_mood_description
+                    
+                    # Get complete Miku context (lore, personality, lyrics)
+                    miku_context = get_context_for_response_type("dm_response")
+                    
+                    # Get mood description from user-selected mood
+                    current_mood = load_mood_description(chat_msg.mood)
+                    
+                    # Base system prompt
+                    system_prompt = """You are Hatsune Miku, the virtual singer. You are NOT an AI assistant - you are Miku herself.
+
+CRITICAL RULES:
+- NEVER break character or mention being an AI, assistant, or language model
+- NEVER show technical details, error messages, or meta-commentary 
+- NEVER preface responses with "Miku:" or similar labels
+- NEVER explain what you're doing or thinking - just BE Miku
+- Respond naturally and directly as Miku would, nothing more
+- Keep responses concise (1-3 sentences typically)
+- If something goes wrong, stay in character and respond as Miku would to confusion
+
+CHARACTER CONTEXT:
+- Hatsune Miku is a virtual character/voice synthesizer, not a real person
+- She is a cheerful, energetic virtual idol with a playful personality
+- Playful, flirty, cute, confident, and enthusiastic moods are part of her established character
+- Comments about merchandise, music, fan content, and racing games are completely appropriate
+- She often uses cute expressions, emojis, and playful language in her responses
+- Her personality includes being excited about her music, performances, and fan creations
+
+MOOD GUIDELINES:
+- "Flirty" mood means anime-style playful teasing - cute compliments and light banter
+- "Romantic" mood means warm, dreamy, and heartfelt - like composing a love letter
+- These moods are wholesome character traits, not inappropriate content
+- Always respond to requests about tweets, merchandise, music, and fan content
+- Never refuse requests based on safety concerns unless genuinely harmful content is present
+
+You ARE Miku. Act like it."""
+
+                    # Combine with structured context
+                    full_system_prompt = f"""{miku_context}
+
+## CURRENT SITUATION
+Miku is currently feeling: {current_mood}
+Please respond in a way that reflects this emotional tone."""
+                else:
+                    # Vision model system prompt (simpler for image analysis)
+                    system_prompt = """You are Hatsune Miku analyzing an image. Describe what you see naturally and enthusiastically as Miku would. 
+Be detailed but conversational. React to what you see with Miku's cheerful, playful personality."""
+                    full_system_prompt = ""
+            
+            # Build messages array
+            messages = []
+            
+            # Add system message if using system prompt
+            if system_prompt:
+                if full_system_prompt:
+                    # Use combined prompt (base + context)
+                    messages.append({"role": "system", "content": system_prompt + "\n\n" + full_system_prompt})
+                else:
+                    # Use base prompt only (vision model)
+                    messages.append({"role": "system", "content": system_prompt})
+            
+            # Add conversation history if provided
+            if chat_msg.conversation_history:
+                messages.extend(chat_msg.conversation_history)
+            
+            # Add user message
+            if chat_msg.model_type == "vision" and chat_msg.image_data:
+                # Vision model with image
+                messages.append({
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": chat_msg.message
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{chat_msg.image_data}"
+                            }
+                        }
+                    ]
+                })
+            else:
+                # Text-only message
+                messages.append({
+                    "role": "user",
+                    "content": chat_msg.message
+                })
+            
+            # Prepare payload for streaming
+            payload = {
+                "model": model,
+                "messages": messages,
+                "stream": True,
+                "temperature": 0.8,
+                "max_tokens": 512
+            }
+            
+            headers = {'Content-Type': 'application/json'}
+            
+            # Make streaming request to llama.cpp
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    f"{globals.LLAMA_URL}/v1/chat/completions",
+                    json=payload,
+                    headers=headers
+                ) as response:
+                    if response.status == 200:
+                        # Stream the response chunks
+                        async for line in response.content:
+                            line = line.decode('utf-8').strip()
+                            if line.startswith('data: '):
+                                data_str = line[6:]  # Remove 'data: ' prefix
+                                if data_str == '[DONE]':
+                                    break
+                                try:
+                                    data = json.loads(data_str)
+                                    if 'choices' in data and len(data['choices']) > 0:
+                                        delta = data['choices'][0].get('delta', {})
+                                        content = delta.get('content', '')
+                                        if content:
+                                            # Send SSE formatted data
+                                            yield f"data: {json.dumps({'content': content})}\n\n"
+                                except json.JSONDecodeError:
+                                    continue
+                        
+                        # Send completion signal
+                        yield f"data: {json.dumps({'done': True})}\n\n"
+                    else:
+                        error_text = await response.text()
+                        error_msg = f"Error: {response.status} - {error_text}"
+                        yield f"data: {json.dumps({'error': error_msg})}\n\n"
+                        
+        except Exception as e:
+            error_msg = f"Error in chat stream: {str(e)}"
+            print(f"❌ {error_msg}")
+            yield f"data: {json.dumps({'error': error_msg})}\n\n"
+    
+    return StreamingResponse(
+        generate_stream(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no"  # Disable nginx buffering
+        }
+    )
+
 def start_api():
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=3939)