Implemented experimental real production ready voice chat, relegated old flow to voice debug mode. New Web UI panel for Voice Chat.

This commit is contained in:
2026-01-20 23:06:17 +02:00
parent 362108f4b0
commit 2934efba22
31 changed files with 5408 additions and 357 deletions

View File

@@ -663,6 +663,7 @@
<button class="tab-button" onclick="switchTab('tab4')">🎨 Image Generation</button>
<button class="tab-button" onclick="switchTab('tab5')">📊 Autonomous Stats</button>
<button class="tab-button" onclick="switchTab('tab6')">💬 Chat with LLM</button>
<button class="tab-button" onclick="switchTab('tab7')">📞 Voice Call</button>
<button class="tab-button" onclick="window.location.href='/static/system.html'">🎛️ System Settings</button>
</div>
@@ -1374,6 +1375,112 @@
</div>
</div>
<!-- Tab 7: Voice Call Management -->
<div id="tab7" class="tab-content">
<div class="section">
<h3>📞 Initiate Voice Call</h3>
<p>Start an automated voice chat session with a user. Miku will automatically manage containers, join voice chat, and send an invitation DM.</p>
<div style="background: #2a2a2a; padding: 1.5rem; border-radius: 8px; margin-bottom: 1.5rem;">
<h4 style="margin-top: 0; color: #61dafb;">⚙️ Voice Call Configuration</h4>
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1.5rem; margin-bottom: 1.5rem;">
<!-- User ID Input -->
<div>
<label style="display: block; margin-bottom: 0.5rem; font-weight: bold;">👤 Target User ID:</label>
<input
type="text"
id="voice-user-id"
placeholder="Discord user ID (e.g., 123456789)"
style="width: 100%; padding: 0.5rem; background: #333; color: #fff; border: 1px solid #555; border-radius: 4px; box-sizing: border-box;"
>
<div style="font-size: 0.85rem; color: #aaa; margin-top: 0.3rem;">
Discord ID of the user to call
</div>
</div>
<!-- Voice Channel ID Input -->
<div>
<label style="display: block; margin-bottom: 0.5rem; font-weight: bold;">🎤 Voice Channel ID:</label>
<input
type="text"
id="voice-channel-id"
placeholder="Discord channel ID (e.g., 987654321)"
style="width: 100%; padding: 0.5rem; background: #333; color: #fff; border: 1px solid #555; border-radius: 4px; box-sizing: border-box;"
>
<div style="font-size: 0.85rem; color: #aaa; margin-top: 0.3rem;">
Discord ID of the voice channel to join
</div>
</div>
</div>
<!-- Debug Mode Toggle -->
<div style="margin-bottom: 1.5rem; padding: 1rem; background: #1e1e1e; border-radius: 4px;">
<label style="display: flex; align-items: center; cursor: pointer;">
<input
type="checkbox"
id="voice-debug-mode"
style="margin-right: 0.7rem; width: 18px; height: 18px; cursor: pointer;"
>
<span style="font-weight: bold;">🐛 Debug Mode</span>
</label>
<div style="font-size: 0.85rem; color: #aaa; margin-top: 0.5rem; margin-left: 1.7rem;">
When enabled, shows voice transcriptions and responses in text channel. When disabled, voice chat is private.
</div>
</div>
<!-- Call Status Display -->
<div id="voice-call-status" style="background: #1e1e1e; padding: 1rem; border-radius: 4px; margin-bottom: 1.5rem; display: none;">
<div style="color: #61dafb; font-weight: bold; margin-bottom: 0.5rem;">📊 Call Status:</div>
<div id="voice-call-status-text" style="color: #aaa; font-size: 0.9rem;"></div>
<div id="voice-call-invite-link" style="margin-top: 0.5rem; display: none;">
<strong>Invite Link:</strong> <a id="voice-call-invite-url" href="" target="_blank" style="color: #61dafb;">View Invite</a>
</div>
</div>
<!-- Call Buttons -->
<div style="display: flex; gap: 1rem;">
<button
id="voice-call-btn"
onclick="initiateVoiceCall()"
style="background: #2ecc71; color: #000; padding: 0.7rem 1.5rem; border: 1px solid #27ae60; border-radius: 4px; cursor: pointer; font-weight: bold; font-size: 1rem;"
>
📞 Initiate Call
</button>
<button
id="voice-call-cancel-btn"
onclick="cancelVoiceCall()"
style="background: #e74c3c; color: #fff; padding: 0.7rem 1.5rem; border: 1px solid #c0392b; border-radius: 4px; cursor: pointer; font-weight: bold; font-size: 1rem; display: none;"
>
🛑 Cancel Call
</button>
</div>
</div>
<!-- Call Information -->
<div style="background: #1a1a2e; padding: 1.5rem; border-radius: 8px; border-left: 3px solid #61dafb;">
<h4 style="margin-top: 0; color: #61dafb;"> How Voice Calls Work</h4>
<ul style="color: #ddd; line-height: 1.8;">
<li><strong>Automatic Setup:</strong> STT and TTS containers start automatically</li>
<li><strong>Warmup Wait:</strong> System waits for both containers to be ready (~30-75 seconds)</li>
<li><strong>VC Join:</strong> Miku joins the specified voice channel</li>
<li><strong>DM Invitation:</strong> User receives a personalized invite DM with a voice channel link</li>
<li><strong>Auto-Listen:</strong> STT automatically starts when user joins</li>
<li><strong>Auto-Leave:</strong> Miku leaves 45 seconds after user disconnects</li>
<li><strong>Timeout:</strong> If user doesn't join within 30 minutes, call is cancelled</li>
</ul>
</div>
<!-- Call History -->
<div style="margin-top: 2rem;">
<h4 style="color: #61dafb; margin-bottom: 1rem;">📋 Recent Calls</h4>
<div id="voice-call-history" style="background: #1e1e1e; border: 1px solid #444; border-radius: 4px; padding: 1rem;">
<div style="text-align: center; color: #888;">No calls yet. Start one above!</div>
</div>
</div>
</div>
</div>
</div>
</div>
@@ -1387,6 +1494,8 @@
<script>
// Global variables
let currentMood = 'neutral';
let voiceCallActive = false;
let voiceCallHistory = [];
let servers = [];
let evilMode = false;
@@ -4324,8 +4433,25 @@ document.addEventListener('DOMContentLoaded', function() {
}
});
}
// Load voice debug mode setting
loadVoiceDebugMode();
});
// Load voice debug mode setting from server
async function loadVoiceDebugMode() {
try {
const response = await fetch('/voice/debug-mode');
const data = await response.json();
const checkbox = document.getElementById('voice-debug-mode');
if (checkbox && data.debug_mode !== undefined) {
checkbox.checked = data.debug_mode;
}
} catch (error) {
console.error('Failed to load voice debug mode:', error);
}
}
// Handle Enter key in chat input
function handleChatKeyPress(event) {
if (event.ctrlKey && event.key === 'Enter') {
@@ -4603,7 +4729,198 @@ function readFileAsBase64(file) {
});
}
// ============================================================================
// Voice Call Management Functions
// ============================================================================
async function initiateVoiceCall() {
const userId = document.getElementById('voice-user-id').value.trim();
const channelId = document.getElementById('voice-channel-id').value.trim();
const debugMode = document.getElementById('voice-debug-mode').checked;
// Validation
if (!userId) {
showNotification('Please enter a user ID', 'error');
return;
}
if (!channelId) {
showNotification('Please enter a voice channel ID', 'error');
return;
}
// Check if user IDs are valid (numeric)
if (isNaN(userId) || isNaN(channelId)) {
showNotification('User ID and Channel ID must be numeric', 'error');
return;
}
// Set debug mode
try {
const debugFormData = new FormData();
debugFormData.append('enabled', debugMode);
await fetch('/voice/debug-mode', {
method: 'POST',
body: debugFormData
});
} catch (error) {
console.error('Failed to set debug mode:', error);
}
// Disable button and show status
const callBtn = document.getElementById('voice-call-btn');
const cancelBtn = document.getElementById('voice-call-cancel-btn');
const statusDiv = document.getElementById('voice-call-status');
const statusText = document.getElementById('voice-call-status-text');
callBtn.disabled = true;
statusDiv.style.display = 'block';
cancelBtn.style.display = 'inline-block';
voiceCallActive = true;
try {
statusText.innerHTML = '⏳ Starting STT and TTS containers...';
const formData = new FormData();
formData.append('user_id', userId);
formData.append('voice_channel_id', channelId);
const response = await fetch('/voice/call', {
method: 'POST',
body: formData
});
const data = await response.json();
// Check for HTTP error status (422 validation error, etc.)
if (!response.ok) {
let errorMsg = data.error || data.detail || 'Unknown error';
// Handle FastAPI validation errors
if (data.detail && Array.isArray(data.detail)) {
errorMsg = data.detail.map(e => `${e.loc.join('.')}: ${e.msg}`).join(', ');
}
statusText.innerHTML = `❌ Error: ${errorMsg}`;
showNotification(`Voice call failed: ${errorMsg}`, 'error');
callBtn.disabled = false;
cancelBtn.style.display = 'none';
voiceCallActive = false;
return;
}
if (!data.success) {
statusText.innerHTML = `❌ Error: ${data.error}`;
showNotification(`Voice call failed: ${data.error}`, 'error');
callBtn.disabled = false;
cancelBtn.style.display = 'none';
voiceCallActive = false;
return;
}
// Success!
statusText.innerHTML = `✅ Voice call initiated!<br>User ID: ${data.user_id}<br>Channel: ${data.channel_id}`;
// Show invite link
const inviteDiv = document.getElementById('voice-call-invite-link');
const inviteUrl = document.getElementById('voice-call-invite-url');
inviteUrl.href = data.invite_url;
inviteUrl.textContent = data.invite_url;
inviteDiv.style.display = 'block';
// Add to call history
addVoiceCallToHistory(userId, channelId, data.invite_url);
showNotification('Voice call initiated successfully!', 'success');
// Auto-reset after 5 minutes (call should be done by then or timed out)
setTimeout(() => {
if (voiceCallActive) {
resetVoiceCall();
}
}, 300000); // 5 minutes
} catch (error) {
console.error('Voice call error:', error);
statusText.innerHTML = `❌ Error: ${error.message}`;
showNotification(`Voice call error: ${error.message}`, 'error');
callBtn.disabled = false;
cancelBtn.style.display = 'none';
voiceCallActive = false;
}
}
function cancelVoiceCall() {
resetVoiceCall();
showNotification('Voice call cancelled', 'info');
}
function resetVoiceCall() {
const callBtn = document.getElementById('voice-call-btn');
const cancelBtn = document.getElementById('voice-call-cancel-btn');
const statusDiv = document.getElementById('voice-call-status');
callBtn.disabled = false;
cancelBtn.style.display = 'none';
statusDiv.style.display = 'none';
voiceCallActive = false;
// Clear inputs
document.getElementById('voice-user-id').value = '';
document.getElementById('voice-channel-id').value = '';
}
function addVoiceCallToHistory(userId, channelId, inviteUrl) {
const now = new Date();
const timestamp = now.toLocaleTimeString();
const callEntry = {
userId: userId,
channelId: channelId,
inviteUrl: inviteUrl,
timestamp: timestamp
};
voiceCallHistory.unshift(callEntry); // Add to front
// Keep only last 10 calls
if (voiceCallHistory.length > 10) {
voiceCallHistory.pop();
}
updateVoiceCallHistoryDisplay();
}
function updateVoiceCallHistoryDisplay() {
const historyDiv = document.getElementById('voice-call-history');
if (voiceCallHistory.length === 0) {
historyDiv.innerHTML = '<div style="text-align: center; color: #888;">No calls yet. Start one above!</div>';
return;
}
let html = '';
voiceCallHistory.forEach((call, index) => {
html += `
<div style="background: #242424; padding: 0.75rem; margin-bottom: 0.5rem; border-radius: 4px; border-left: 3px solid #61dafb;">
<div style="display: flex; justify-content: space-between; align-items: center;">
<div>
<strong>${call.timestamp}</strong>
<div style="font-size: 0.85rem; color: #aaa; margin-top: 0.3rem;">
User: <code>${call.userId}</code> | Channel: <code>${call.channelId}</code>
</div>
</div>
<a href="${call.inviteUrl}" target="_blank" style="color: #61dafb; text-decoration: none; padding: 0.3rem 0.7rem; background: #333; border-radius: 4px; font-size: 0.85rem;">
View Link →
</a>
</div>
</div>
`;
});
historyDiv.innerHTML = html;
}
</script>
</body>
</html>