Implemented new Japanese only text mode with WebUI toggle, utilizing a llama3.1 swallow dataset model. Next up is Japanese TTS.
This commit is contained in:
48
bot/api.py
48
bot/api.py
@@ -226,6 +226,54 @@ def calm_miku_endpoint():
|
|||||||
|
|
||||||
return {"status": "ok", "message": "Miku has been calmed down"}
|
return {"status": "ok", "message": "Miku has been calmed down"}
|
||||||
|
|
||||||
|
# ========== Language Mode Management ==========
|
||||||
|
@app.get("/language")
|
||||||
|
def get_language_mode():
|
||||||
|
"""Get current language mode (english or japanese)"""
|
||||||
|
return {
|
||||||
|
"language_mode": globals.LANGUAGE_MODE,
|
||||||
|
"available_languages": ["english", "japanese"],
|
||||||
|
"current_model": globals.JAPANESE_TEXT_MODEL if globals.LANGUAGE_MODE == "japanese" else globals.TEXT_MODEL
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.post("/language/toggle")
|
||||||
|
def toggle_language_mode():
|
||||||
|
"""Toggle between English and Japanese modes"""
|
||||||
|
if globals.LANGUAGE_MODE == "english":
|
||||||
|
globals.LANGUAGE_MODE = "japanese"
|
||||||
|
new_mode = "japanese"
|
||||||
|
model_used = globals.JAPANESE_TEXT_MODEL
|
||||||
|
logger.info("Switched to Japanese mode (using Llama 3.1 Swallow)")
|
||||||
|
else:
|
||||||
|
globals.LANGUAGE_MODE = "english"
|
||||||
|
new_mode = "english"
|
||||||
|
model_used = globals.TEXT_MODEL
|
||||||
|
logger.info("Switched to English mode (using default model)")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"language_mode": new_mode,
|
||||||
|
"model_now_using": model_used,
|
||||||
|
"message": f"Miku is now speaking in {new_mode.upper()}!"
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.post("/language/set")
|
||||||
|
def set_language_mode(language: str = "english"):
|
||||||
|
"""Set language mode to either 'english' or 'japanese'"""
|
||||||
|
if language.lower() not in ["english", "japanese"]:
|
||||||
|
return {"error": f"Invalid language mode '{language}'. Use 'english' or 'japanese'."}, 400
|
||||||
|
|
||||||
|
globals.LANGUAGE_MODE = language.lower()
|
||||||
|
model_used = globals.JAPANESE_TEXT_MODEL if language.lower() == "japanese" else globals.TEXT_MODEL
|
||||||
|
logger.info(f"Language mode set to {language.lower()} (using {model_used})")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"language_mode": language.lower(),
|
||||||
|
"model_now_using": model_used,
|
||||||
|
"message": f"Miku is now speaking in {language.upper()}!"
|
||||||
|
}
|
||||||
|
|
||||||
# ========== Evil Mode Management ==========
|
# ========== Evil Mode Management ==========
|
||||||
@app.get("/evil-mode")
|
@app.get("/evil-mode")
|
||||||
def get_evil_mode_status():
|
def get_evil_mode_status():
|
||||||
|
|||||||
@@ -26,8 +26,12 @@ LLAMA_AMD_URL = os.getenv("LLAMA_AMD_URL", "http://llama-swap-amd:8080") # Seco
|
|||||||
TEXT_MODEL = os.getenv("TEXT_MODEL", "llama3.1")
|
TEXT_MODEL = os.getenv("TEXT_MODEL", "llama3.1")
|
||||||
VISION_MODEL = os.getenv("VISION_MODEL", "vision")
|
VISION_MODEL = os.getenv("VISION_MODEL", "vision")
|
||||||
EVIL_TEXT_MODEL = os.getenv("EVIL_TEXT_MODEL", "darkidol") # Uncensored model for evil mode
|
EVIL_TEXT_MODEL = os.getenv("EVIL_TEXT_MODEL", "darkidol") # Uncensored model for evil mode
|
||||||
|
JAPANESE_TEXT_MODEL = os.getenv("JAPANESE_TEXT_MODEL", "swallow") # Llama 3.1 Swallow model for Japanese
|
||||||
OWNER_USER_ID = int(os.getenv("OWNER_USER_ID", "209381657369772032")) # Bot owner's Discord user ID for reports
|
OWNER_USER_ID = int(os.getenv("OWNER_USER_ID", "209381657369772032")) # Bot owner's Discord user ID for reports
|
||||||
|
|
||||||
|
# Language mode for Miku (english or japanese)
|
||||||
|
LANGUAGE_MODE = "english" # Can be "english" or "japanese"
|
||||||
|
|
||||||
# Fish.audio TTS settings
|
# Fish.audio TTS settings
|
||||||
FISH_API_KEY = os.getenv("FISH_API_KEY", "478d263d8c094e0c8993aae3e9cf9159")
|
FISH_API_KEY = os.getenv("FISH_API_KEY", "478d263d8c094e0c8993aae3e9cf9159")
|
||||||
MIKU_VOICE_ID = os.getenv("MIKU_VOICE_ID", "b28b79555e8c4904ac4d048c36e716b7")
|
MIKU_VOICE_ID = os.getenv("MIKU_VOICE_ID", "b28b79555e8c4904ac4d048c36e716b7")
|
||||||
|
|||||||
@@ -660,10 +660,11 @@
|
|||||||
<button class="tab-button active" onclick="switchTab('tab1')">Server Management</button>
|
<button class="tab-button active" onclick="switchTab('tab1')">Server Management</button>
|
||||||
<button class="tab-button" onclick="switchTab('tab2')">Actions</button>
|
<button class="tab-button" onclick="switchTab('tab2')">Actions</button>
|
||||||
<button class="tab-button" onclick="switchTab('tab3')">Status</button>
|
<button class="tab-button" onclick="switchTab('tab3')">Status</button>
|
||||||
<button class="tab-button" onclick="switchTab('tab4')">🎨 Image Generation</button>
|
<button class="tab-button" onclick="switchTab('tab4')">⚙️ LLM Settings</button>
|
||||||
<button class="tab-button" onclick="switchTab('tab5')">📊 Autonomous Stats</button>
|
<button class="tab-button" onclick="switchTab('tab5')">🎨 Image Generation</button>
|
||||||
<button class="tab-button" onclick="switchTab('tab6')">💬 Chat with LLM</button>
|
<button class="tab-button" onclick="switchTab('tab6')">📊 Autonomous Stats</button>
|
||||||
<button class="tab-button" onclick="switchTab('tab7')">📞 Voice Call</button>
|
<button class="tab-button" onclick="switchTab('tab7')">💬 Chat with LLM</button>
|
||||||
|
<button class="tab-button" onclick="switchTab('tab8')">📞 Voice Call</button>
|
||||||
<button class="tab-button" onclick="window.location.href='/static/system.html'">🎛️ System Settings</button>
|
<button class="tab-button" onclick="window.location.href='/static/system.html'">🎛️ System Settings</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -1173,8 +1174,70 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Image Generation Tab Content -->
|
<!-- LLM Settings Tab Content -->
|
||||||
<div id="tab4" class="tab-content">
|
<div id="tab4" class="tab-content">
|
||||||
|
<div class="section">
|
||||||
|
<h3>⚙️ Language Model Settings</h3>
|
||||||
|
<p>Configure language model behavior and language mode.</p>
|
||||||
|
|
||||||
|
<!-- Language Mode Section -->
|
||||||
|
<div style="margin-bottom: 1.5rem; padding: 1rem; background: #2a2a2a; border-radius: 4px; border: 2px solid #4a7bc9;">
|
||||||
|
<h4 style="margin-top: 0; color: #61dafb;">🌐 Language Mode</h4>
|
||||||
|
<p style="margin: 0.5rem 0; color: #aaa;">Switch Miku between English and Japanese responses.</p>
|
||||||
|
|
||||||
|
<div style="margin: 1rem 0;">
|
||||||
|
<div style="margin-bottom: 1rem;">
|
||||||
|
<strong>Current Language:</strong> <span id="current-language-display" style="color: #61dafb; font-weight: bold;">English</span>
|
||||||
|
</div>
|
||||||
|
<button onclick="toggleLanguageMode()" style="background: #4a7bc9; color: #fff; padding: 0.6rem 1.2rem; border: 2px solid #61dafb; border-radius: 4px; cursor: pointer; font-weight: bold; font-size: 1rem;">
|
||||||
|
🔄 Toggle Language (English ↔ Japanese)
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style="margin-top: 1rem; padding: 1rem; background: #1a1a1a; border-radius: 4px; border-left: 3px solid #4a7bc9;">
|
||||||
|
<div style="font-size: 0.9rem;">
|
||||||
|
<div style="margin-bottom: 0.5rem;"><strong>English Mode:</strong></div>
|
||||||
|
<ul style="margin: 0 0 0.5rem 0; padding-left: 1.5rem; color: #aaa;">
|
||||||
|
<li>Uses standard Llama 3.1 model</li>
|
||||||
|
<li>Responds in English only</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<div style="margin-bottom: 0.5rem;"><strong>Japanese Mode (日本語):</strong></div>
|
||||||
|
<ul style="margin: 0 0 0; padding-left: 1.5rem; color: #aaa;">
|
||||||
|
<li>Uses Llama 3.1 Swallow model (trained for Japanese)</li>
|
||||||
|
<li>Responds entirely in Japanese</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Language Mode Status Section -->
|
||||||
|
<div style="margin-bottom: 1.5rem; padding: 1rem; background: #2a2a2a; border-radius: 4px;">
|
||||||
|
<h4 style="margin-top: 0;">📊 Current Status</h4>
|
||||||
|
<div id="language-status-display" style="background: #1a1a1a; padding: 1rem; border-radius: 4px; font-family: monospace; font-size: 0.9rem;">
|
||||||
|
<p style="margin: 0.5rem 0;"><strong>Language Mode:</strong> <span id="status-language">English</span></p>
|
||||||
|
<p style="margin: 0.5rem 0;"><strong>Active Model:</strong> <span id="status-model">llama3.1</span></p>
|
||||||
|
<p style="margin: 0.5rem 0;"><strong>Available Languages:</strong> English, 日本語 (Japanese)</p>
|
||||||
|
</div>
|
||||||
|
<button onclick="refreshLanguageStatus()" style="margin-top: 1rem;">🔄 Refresh Status</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Information Section -->
|
||||||
|
<div style="padding: 1rem; background: #1a1a1a; border-radius: 4px; border-left: 3px solid #ff9800;">
|
||||||
|
<h4 style="margin-top: 0; color: #ff9800;">ℹ️ How Language Mode Works</h4>
|
||||||
|
<ul style="margin: 0.5rem 0; padding-left: 1.5rem; font-size: 0.9rem; color: #aaa;">
|
||||||
|
<li>English mode uses your default text model for English responses</li>
|
||||||
|
<li>Japanese mode switches to Swallow and responds only in 日本語</li>
|
||||||
|
<li>All personality traits, mood system, and features work in both modes</li>
|
||||||
|
<li>Language mode is global - affects all servers and DMs</li>
|
||||||
|
<li>Conversation history is preserved across language switches</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Image Generation Tab Content -->
|
||||||
|
<div id="tab5" class="tab-content">
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<h3>🎨 Image Generation System</h3>
|
<h3>🎨 Image Generation System</h3>
|
||||||
<p>Natural language image generation powered by ComfyUI. Users can ask Miku to create images naturally without commands!</p>
|
<p>Natural language image generation powered by ComfyUI. Users can ask Miku to create images naturally without commands!</p>
|
||||||
@@ -1232,7 +1295,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Autonomous Stats Tab Content -->
|
<!-- Autonomous Stats Tab Content -->
|
||||||
<div id="tab5" class="tab-content">
|
<div id="tab6" class="tab-content">
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<h3>📊 Autonomous V2 Decision Engine Stats</h3>
|
<h3>📊 Autonomous V2 Decision Engine Stats</h3>
|
||||||
<p>Real-time monitoring of Miku's autonomous decision-making context and mood-based personality stats.</p>
|
<p>Real-time monitoring of Miku's autonomous decision-making context and mood-based personality stats.</p>
|
||||||
@@ -1250,7 +1313,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Chat with LLM Tab Content -->
|
<!-- Chat with LLM Tab Content -->
|
||||||
<div id="tab6" class="tab-content">
|
<div id="tab7" class="tab-content">
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<h3>💬 Chat with LLM</h3>
|
<h3>💬 Chat with LLM</h3>
|
||||||
<p>Direct chat interface with the language models. Test responses, experiment with prompts, or just chat with Miku!</p>
|
<p>Direct chat interface with the language models. Test responses, experiment with prompts, or just chat with Miku!</p>
|
||||||
@@ -1375,8 +1438,8 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Tab 7: Voice Call Management -->
|
<!-- Tab 8: Voice Call Management -->
|
||||||
<div id="tab7" class="tab-content">
|
<div id="tab8" class="tab-content">
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<h3>📞 Initiate Voice Call</h3>
|
<h3>📞 Initiate Voice Call</h3>
|
||||||
<p>Start an automated voice chat session with a user. Miku will automatically manage containers, join voice chat, and send an invitation DM.</p>
|
<p>Start an automated voice chat session with a user. Miku will automatically manage containers, join voice chat, and send an invitation DM.</p>
|
||||||
@@ -1559,6 +1622,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
checkEvilModeStatus(); // Check evil mode on load
|
checkEvilModeStatus(); // Check evil mode on load
|
||||||
checkBipolarModeStatus(); // Check bipolar mode on load
|
checkBipolarModeStatus(); // Check bipolar mode on load
|
||||||
checkGPUStatus(); // Check GPU selection on load
|
checkGPUStatus(); // Check GPU selection on load
|
||||||
|
refreshLanguageStatus(); // Check language mode on load
|
||||||
console.log('🚀 DOMContentLoaded - initializing figurine subscribers list');
|
console.log('🚀 DOMContentLoaded - initializing figurine subscribers list');
|
||||||
refreshFigurineSubscribers();
|
refreshFigurineSubscribers();
|
||||||
loadProfilePictureMetadata();
|
loadProfilePictureMetadata();
|
||||||
@@ -2251,6 +2315,43 @@ async function calmMiku() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ===== Language Mode Functions =====
|
||||||
|
async function refreshLanguageStatus() {
|
||||||
|
try {
|
||||||
|
const result = await apiCall('/language');
|
||||||
|
document.getElementById('current-language-display').textContent =
|
||||||
|
result.language_mode === 'japanese' ? '日本語 (Japanese)' : 'English';
|
||||||
|
document.getElementById('status-language').textContent =
|
||||||
|
result.language_mode === 'japanese' ? '日本語 (Japanese)' : 'English';
|
||||||
|
document.getElementById('status-model').textContent = result.current_model;
|
||||||
|
|
||||||
|
console.log('Language status:', result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to get language status:', error);
|
||||||
|
showNotification('Failed to load language status', 'error');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function toggleLanguageMode() {
|
||||||
|
try {
|
||||||
|
const result = await apiCall('/language/toggle', 'POST');
|
||||||
|
|
||||||
|
// Update UI
|
||||||
|
document.getElementById('current-language-display').textContent =
|
||||||
|
result.language_mode === 'japanese' ? '日本語 (Japanese)' : 'English';
|
||||||
|
document.getElementById('status-language').textContent =
|
||||||
|
result.language_mode === 'japanese' ? '日本語 (Japanese)' : 'English';
|
||||||
|
document.getElementById('status-model').textContent = result.model_now_using;
|
||||||
|
|
||||||
|
// Show notification
|
||||||
|
showNotification(result.message, 'success');
|
||||||
|
console.log('Language toggled:', result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to toggle language mode:', error);
|
||||||
|
showNotification('Failed to toggle language mode', 'error');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Evil Mode Functions
|
// Evil Mode Functions
|
||||||
async function checkEvilModeStatus() {
|
async function checkEvilModeStatus() {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -3,8 +3,12 @@
|
|||||||
Structured context management for Miku's personality and knowledge.
|
Structured context management for Miku's personality and knowledge.
|
||||||
Replaces the vector search system with organized, complete context.
|
Replaces the vector search system with organized, complete context.
|
||||||
Preserves original content files in their entirety.
|
Preserves original content files in their entirety.
|
||||||
|
|
||||||
|
When LANGUAGE_MODE is "japanese", appends a Japanese instruction to ensure
|
||||||
|
all responses are in Japanese without requiring separate files.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import globals
|
||||||
from utils.logger import get_logger
|
from utils.logger import get_logger
|
||||||
|
|
||||||
logger = get_logger('core')
|
logger = get_logger('core')
|
||||||
@@ -40,60 +44,96 @@ def get_original_miku_lyrics() -> str:
|
|||||||
return "## MIKU LYRICS\n[File could not be loaded]"
|
return "## MIKU LYRICS\n[File could not be loaded]"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_japanese_instruction() -> str:
|
||||||
|
"""
|
||||||
|
Returns the Japanese language instruction to append to context.
|
||||||
|
Ensures all responses are in Japanese when in Japanese mode.
|
||||||
|
"""
|
||||||
|
return "\n\n[CRITICAL INSTRUCTION - 重要な指示]\n**YOU MUST RESPOND ENTIRELY IN JAPANESE (日本語). NO ENGLISH ALLOWED.**\nすべての返答は必ず日本語で行ってください。英語での返答は一切禁止されています。\nこれは最優先の指示です。必ず守ってください。"
|
||||||
|
|
||||||
|
|
||||||
def get_complete_context() -> str:
|
def get_complete_context() -> str:
|
||||||
"""Returns all essential Miku context using original files in their entirety"""
|
"""
|
||||||
return f"""## MIKU LORE (Complete Original)
|
Returns all essential Miku context using original files in their entirety.
|
||||||
{get_original_miku_lore()}
|
|
||||||
|
If LANGUAGE_MODE is "japanese", appends a Japanese instruction to ensure
|
||||||
|
all responses are in Japanese.
|
||||||
|
"""
|
||||||
|
lore = get_original_miku_lore()
|
||||||
|
prompt = get_original_miku_prompt()
|
||||||
|
lyrics = get_original_miku_lyrics()
|
||||||
|
|
||||||
|
combined = f"""## MIKU LORE (Complete Original)
|
||||||
|
{lore}
|
||||||
|
|
||||||
## MIKU PERSONALITY & GUIDELINES (Complete Original)
|
## MIKU PERSONALITY & GUIDELINES (Complete Original)
|
||||||
{get_original_miku_prompt()}
|
{prompt}
|
||||||
|
|
||||||
## MIKU SONG LYRICS (Complete Original)
|
## MIKU SONG LYRICS (Complete Original)
|
||||||
{get_original_miku_lyrics()}"""
|
{lyrics}"""
|
||||||
|
|
||||||
|
# Append Japanese instruction if in Japanese mode
|
||||||
|
if globals.LANGUAGE_MODE == "japanese":
|
||||||
|
combined += _get_japanese_instruction()
|
||||||
|
|
||||||
|
logger.info(f"[core] Context loaded in {globals.LANGUAGE_MODE} mode")
|
||||||
|
return combined
|
||||||
|
|
||||||
|
|
||||||
def get_context_for_response_type(response_type: str) -> str:
|
def get_context_for_response_type(response_type: str) -> str:
|
||||||
"""Returns appropriate context based on the type of response being generated"""
|
"""
|
||||||
|
Returns appropriate context based on the type of response being generated.
|
||||||
|
|
||||||
# Core context always includes the complete original files
|
If LANGUAGE_MODE is "japanese", appends Japanese instruction to all contexts
|
||||||
|
to ensure responses are in Japanese.
|
||||||
|
"""
|
||||||
|
|
||||||
|
lore = get_original_miku_lore()
|
||||||
|
prompt = get_original_miku_prompt()
|
||||||
|
lyrics = get_original_miku_lyrics()
|
||||||
|
|
||||||
|
# Build core context (always in English source files)
|
||||||
core_context = f"""## MIKU LORE (Complete Original)
|
core_context = f"""## MIKU LORE (Complete Original)
|
||||||
{get_original_miku_lore()}
|
{lore}
|
||||||
|
|
||||||
## MIKU PERSONALITY & GUIDELINES (Complete Original)
|
## MIKU PERSONALITY & GUIDELINES (Complete Original)
|
||||||
{get_original_miku_prompt()}"""
|
{prompt}"""
|
||||||
|
|
||||||
|
# Return context based on response type
|
||||||
if response_type == "autonomous_general":
|
if response_type == "autonomous_general":
|
||||||
# For general autonomous messages, include everything
|
context = f"""{core_context}
|
||||||
return f"""{core_context}
|
|
||||||
|
|
||||||
## MIKU SONG LYRICS (Complete Original)
|
## MIKU SONG LYRICS (Complete Original)
|
||||||
{get_original_miku_lyrics()}"""
|
{lyrics}"""
|
||||||
|
|
||||||
elif response_type == "autonomous_tweet":
|
elif response_type == "autonomous_tweet":
|
||||||
# For tweet responses, include lyrics for musical context
|
context = f"""{core_context}
|
||||||
return f"""{core_context}
|
|
||||||
|
|
||||||
## MIKU SONG LYRICS (Complete Original)
|
## MIKU SONG LYRICS (Complete Original)
|
||||||
{get_original_miku_lyrics()}"""
|
{lyrics}"""
|
||||||
|
|
||||||
elif response_type == "dm_response" or response_type == "server_response":
|
elif response_type == "dm_response" or response_type == "server_response":
|
||||||
# For conversational responses, include everything
|
context = f"""{core_context}
|
||||||
return f"""{core_context}
|
|
||||||
|
|
||||||
## MIKU SONG LYRICS (Complete Original)
|
## MIKU SONG LYRICS (Complete Original)
|
||||||
{get_original_miku_lyrics()}"""
|
{lyrics}"""
|
||||||
|
|
||||||
elif response_type == "conversation_join":
|
elif response_type == "conversation_join":
|
||||||
# For joining conversations, include everything
|
context = f"""{core_context}
|
||||||
return f"""{core_context}
|
|
||||||
|
|
||||||
## MIKU SONG LYRICS (Complete Original)
|
## MIKU SONG LYRICS (Complete Original)
|
||||||
{get_original_miku_lyrics()}"""
|
{lyrics}"""
|
||||||
|
|
||||||
elif response_type == "emoji_selection":
|
elif response_type == "emoji_selection":
|
||||||
# For emoji reactions, no context needed - the prompt has everything
|
# For emoji reactions, minimal context needed
|
||||||
return ""
|
context = ""
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Default: comprehensive context
|
# Default: comprehensive context
|
||||||
return get_complete_context()
|
context = get_complete_context()
|
||||||
|
|
||||||
|
# Append Japanese instruction if in Japanese mode
|
||||||
|
if globals.LANGUAGE_MODE == "japanese" and context:
|
||||||
|
context += _get_japanese_instruction()
|
||||||
|
|
||||||
|
return context
|
||||||
|
|||||||
@@ -239,7 +239,13 @@ async def analyze_image_with_vision(base64_img):
|
|||||||
Uses OpenAI-compatible chat completions API with image_url.
|
Uses OpenAI-compatible chat completions API with image_url.
|
||||||
Always uses NVIDIA GPU for vision model.
|
Always uses NVIDIA GPU for vision model.
|
||||||
"""
|
"""
|
||||||
from utils.llm import get_vision_gpu_url
|
from utils.llm import get_vision_gpu_url, check_vision_endpoint_health
|
||||||
|
|
||||||
|
# Check if vision endpoint is healthy before attempting request
|
||||||
|
is_healthy, error = await check_vision_endpoint_health()
|
||||||
|
if not is_healthy:
|
||||||
|
logger.warning(f"Vision endpoint unhealthy: {error}")
|
||||||
|
return f"Vision service currently unavailable: {error}"
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": globals.VISION_MODEL,
|
"model": globals.VISION_MODEL,
|
||||||
@@ -269,17 +275,20 @@ async def analyze_image_with_vision(base64_img):
|
|||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
try:
|
try:
|
||||||
vision_url = get_vision_gpu_url()
|
vision_url = get_vision_gpu_url()
|
||||||
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response:
|
logger.info(f"Sending vision request to {vision_url} using model: {globals.VISION_MODEL}")
|
||||||
|
|
||||||
|
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=60)) as response:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
data = await response.json()
|
data = await response.json()
|
||||||
return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
|
result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
|
||||||
|
logger.info(f"Vision analysis completed successfully")
|
||||||
|
return result
|
||||||
else:
|
else:
|
||||||
error_text = await response.text()
|
error_text = await response.text()
|
||||||
logger.error(f"Vision API error: {response.status} - {error_text}")
|
logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})")
|
||||||
return f"Error analyzing image: {response.status}"
|
return f"Error analyzing image: {response.status}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in analyze_image_with_vision: {e}")
|
logger.error(f"Error in analyze_image_with_vision: {e}", exc_info=True)
|
||||||
return f"Error analyzing image: {str(e)}"
|
|
||||||
|
|
||||||
|
|
||||||
async def analyze_video_with_vision(video_frames, media_type="video"):
|
async def analyze_video_with_vision(video_frames, media_type="video"):
|
||||||
@@ -288,6 +297,13 @@ async def analyze_video_with_vision(video_frames, media_type="video"):
|
|||||||
video_frames: list of base64-encoded frames
|
video_frames: list of base64-encoded frames
|
||||||
media_type: "video", "gif", or "tenor_gif" to customize the analysis prompt
|
media_type: "video", "gif", or "tenor_gif" to customize the analysis prompt
|
||||||
"""
|
"""
|
||||||
|
from utils.llm import get_vision_gpu_url, check_vision_endpoint_health
|
||||||
|
|
||||||
|
# Check if vision endpoint is healthy before attempting request
|
||||||
|
is_healthy, error = await check_vision_endpoint_health()
|
||||||
|
if not is_healthy:
|
||||||
|
logger.warning(f"Vision endpoint unhealthy: {error}")
|
||||||
|
return f"Vision service currently unavailable: {error}"
|
||||||
|
|
||||||
# Customize prompt based on media type
|
# Customize prompt based on media type
|
||||||
if media_type == "gif":
|
if media_type == "gif":
|
||||||
@@ -331,16 +347,20 @@ async def analyze_video_with_vision(video_frames, media_type="video"):
|
|||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
try:
|
try:
|
||||||
vision_url = get_vision_gpu_url()
|
vision_url = get_vision_gpu_url()
|
||||||
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response:
|
logger.info(f"Sending video analysis request to {vision_url} using model: {globals.VISION_MODEL} (media_type: {media_type}, frames: {len(video_frames)})")
|
||||||
|
|
||||||
|
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)) as response:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
data = await response.json()
|
data = await response.json()
|
||||||
return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
|
result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
|
||||||
|
logger.info(f"Video analysis completed successfully")
|
||||||
|
return result
|
||||||
else:
|
else:
|
||||||
error_text = await response.text()
|
error_text = await response.text()
|
||||||
logger.error(f"Vision API error: {response.status} - {error_text}")
|
logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})")
|
||||||
return f"Error analyzing video: {response.status}"
|
return f"Error analyzing video: {response.status}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in analyze_video_with_vision: {e}")
|
logger.error(f"Error in analyze_video_with_vision: {e}", exc_info=True)
|
||||||
return f"Error analyzing video: {str(e)}"
|
return f"Error analyzing video: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -38,8 +38,47 @@ def get_vision_gpu_url():
|
|||||||
Strategy: Always use NVIDIA GPU for vision to avoid unloading/reloading.
|
Strategy: Always use NVIDIA GPU for vision to avoid unloading/reloading.
|
||||||
- When NVIDIA is primary: Use NVIDIA for both text and vision
|
- When NVIDIA is primary: Use NVIDIA for both text and vision
|
||||||
- When AMD is primary: Use AMD for text, NVIDIA for vision (keeps vision loaded)
|
- When AMD is primary: Use AMD for text, NVIDIA for vision (keeps vision loaded)
|
||||||
|
|
||||||
|
Important: Vision model (MiniCPM-V) is ONLY configured on NVIDIA GPU.
|
||||||
|
This ensures vision inference is always fast and doesn't interfere with
|
||||||
|
AMD text model inference.
|
||||||
"""
|
"""
|
||||||
return globals.LLAMA_URL # Always use NVIDIA for vision
|
current_text_gpu = get_current_gpu_url()
|
||||||
|
nvidia_vision_url = globals.LLAMA_URL
|
||||||
|
|
||||||
|
# Vision ALWAYS uses NVIDIA, regardless of which GPU is primary for text
|
||||||
|
# Log this decision when GPU switching is active (primary text GPU is AMD)
|
||||||
|
if current_text_gpu == globals.LLAMA_AMD_URL:
|
||||||
|
logger.debug(f"Primary GPU is AMD for text, but using NVIDIA for vision model")
|
||||||
|
|
||||||
|
return nvidia_vision_url # Always use NVIDIA for vision
|
||||||
|
|
||||||
|
async def check_vision_endpoint_health():
|
||||||
|
"""
|
||||||
|
Check if NVIDIA GPU vision endpoint is healthy and responsive.
|
||||||
|
This is important when AMD is the primary GPU to ensure vision still works.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (is_healthy: bool, error_message: Optional[str])
|
||||||
|
"""
|
||||||
|
import aiohttp
|
||||||
|
vision_url = get_vision_gpu_url()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(f"{vision_url}/health", timeout=aiohttp.ClientTimeout(total=5)) as response:
|
||||||
|
is_healthy = response.status == 200
|
||||||
|
if is_healthy:
|
||||||
|
logger.info(f"Vision endpoint ({vision_url}) health check: OK")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Vision endpoint ({vision_url}) health check failed: status {response.status}")
|
||||||
|
return is_healthy, None if is_healthy else f"Status {response.status}"
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.error(f"Vision endpoint ({vision_url}) health check: timeout")
|
||||||
|
return False, "Endpoint timeout"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Vision endpoint ({vision_url}) health check error: {e}")
|
||||||
|
return False, str(e)
|
||||||
|
|
||||||
def _strip_surrounding_quotes(text):
|
def _strip_surrounding_quotes(text):
|
||||||
"""
|
"""
|
||||||
@@ -108,8 +147,12 @@ async def query_llama(user_prompt, user_id, guild_id=None, response_type="dm_res
|
|||||||
if evil_mode:
|
if evil_mode:
|
||||||
model = globals.EVIL_TEXT_MODEL # Use DarkIdol uncensored model
|
model = globals.EVIL_TEXT_MODEL # Use DarkIdol uncensored model
|
||||||
logger.info(f"Using evil model: {model}")
|
logger.info(f"Using evil model: {model}")
|
||||||
|
elif globals.LANGUAGE_MODE == "japanese":
|
||||||
|
model = globals.JAPANESE_TEXT_MODEL # Use Swallow for Japanese
|
||||||
|
logger.info(f"Using Japanese model: {model}")
|
||||||
else:
|
else:
|
||||||
model = globals.TEXT_MODEL
|
model = globals.TEXT_MODEL
|
||||||
|
logger.info(f"Using default model: {model}")
|
||||||
|
|
||||||
# Determine channel_id for conversation history
|
# Determine channel_id for conversation history
|
||||||
# For servers, use guild_id; for DMs, use user_id
|
# For servers, use guild_id; for DMs, use user_id
|
||||||
|
|||||||
@@ -18,6 +18,15 @@ models:
|
|||||||
- darkidol
|
- darkidol
|
||||||
- evil-model
|
- evil-model
|
||||||
- uncensored
|
- uncensored
|
||||||
|
|
||||||
|
# Japanese language model (Llama 3.1 Swallow - Japanese optimized)
|
||||||
|
swallow:
|
||||||
|
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-Swallow-8B-Instruct-v0.5-Q4_K_M.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0 --no-warmup
|
||||||
|
ttl: 1800 # Unload after 30 minutes of inactivity
|
||||||
|
aliases:
|
||||||
|
- swallow
|
||||||
|
- japanese
|
||||||
|
- japanese-model
|
||||||
|
|
||||||
# Vision/Multimodal model (MiniCPM-V-4.5 - supports images, video, and GIFs)
|
# Vision/Multimodal model (MiniCPM-V-4.5 - supports images, video, and GIFs)
|
||||||
vision:
|
vision:
|
||||||
|
|||||||
@@ -19,6 +19,15 @@ models:
|
|||||||
- darkidol
|
- darkidol
|
||||||
- evil-model
|
- evil-model
|
||||||
- uncensored
|
- uncensored
|
||||||
|
|
||||||
|
# Japanese language model (Llama 3.1 Swallow - Japanese optimized)
|
||||||
|
swallow:
|
||||||
|
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-Swallow-8B-Instruct-v0.5-Q4_K_M.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0 --no-warmup
|
||||||
|
ttl: 1800 # Unload after 30 minutes of inactivity
|
||||||
|
aliases:
|
||||||
|
- swallow
|
||||||
|
- japanese
|
||||||
|
- japanese-model
|
||||||
|
|
||||||
# Server configuration
|
# Server configuration
|
||||||
# llama-swap will listen on this address
|
# llama-swap will listen on this address
|
||||||
|
|||||||
Reference in New Issue
Block a user