2026-01-16 13:01:08 +02:00
|
|
|
|
# voice.py
|
|
|
|
|
|
"""
|
|
|
|
|
|
Voice channel commands for Miku Discord bot.
|
|
|
|
|
|
Handles joining, leaving, and status commands for voice chat sessions.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import discord
|
2026-01-17 00:01:17 +02:00
|
|
|
|
import aiohttp
|
|
|
|
|
|
import json
|
2026-01-16 13:01:08 +02:00
|
|
|
|
from utils.voice_manager import voice_manager
|
|
|
|
|
|
from utils.logger import get_logger
|
2026-01-17 00:01:17 +02:00
|
|
|
|
from utils.llm import get_current_gpu_url
|
2026-01-16 13:01:08 +02:00
|
|
|
|
|
|
|
|
|
|
logger = get_logger('voice_commands')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def handle_voice_command(message, cmd, args):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Handle voice-related commands.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
message: Discord message object
|
2026-01-16 23:37:34 +02:00
|
|
|
|
cmd: Command name (join, leave, voice-status, test)
|
2026-01-16 13:01:08 +02:00
|
|
|
|
args: Command arguments
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
if cmd == 'join':
|
|
|
|
|
|
await _handle_join(message, args)
|
|
|
|
|
|
|
|
|
|
|
|
elif cmd == 'leave':
|
|
|
|
|
|
await _handle_leave(message)
|
|
|
|
|
|
|
|
|
|
|
|
elif cmd == 'voice-status':
|
|
|
|
|
|
await _handle_status(message)
|
|
|
|
|
|
|
2026-01-16 23:37:34 +02:00
|
|
|
|
elif cmd == 'test':
|
|
|
|
|
|
await _handle_test(message, args)
|
|
|
|
|
|
|
2026-01-17 00:01:17 +02:00
|
|
|
|
elif cmd == 'say':
|
|
|
|
|
|
await _handle_say(message, args)
|
|
|
|
|
|
|
2026-01-16 13:01:08 +02:00
|
|
|
|
else:
|
|
|
|
|
|
await message.channel.send(f"❌ Unknown voice command: `{cmd}`")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _handle_join(message, args):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Handle !miku join command.
|
|
|
|
|
|
Join voice channel and start session with resource locks.
|
|
|
|
|
|
"""
|
|
|
|
|
|
# Get voice channel
|
|
|
|
|
|
voice_channel = None
|
|
|
|
|
|
|
|
|
|
|
|
if args and args[0].startswith('<#'):
|
|
|
|
|
|
# Channel mentioned (e.g., !miku join #voice-chat)
|
|
|
|
|
|
try:
|
|
|
|
|
|
channel_id = int(args[0][2:-1])
|
|
|
|
|
|
voice_channel = message.guild.get_channel(channel_id)
|
|
|
|
|
|
|
|
|
|
|
|
if not isinstance(voice_channel, discord.VoiceChannel):
|
|
|
|
|
|
await message.channel.send("❌ That's not a voice channel!")
|
|
|
|
|
|
return
|
|
|
|
|
|
except (ValueError, AttributeError):
|
|
|
|
|
|
await message.channel.send("❌ Invalid channel!")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Use user's current voice channel
|
|
|
|
|
|
if message.author.voice and message.author.voice.channel:
|
|
|
|
|
|
voice_channel = message.author.voice.channel
|
|
|
|
|
|
else:
|
|
|
|
|
|
await message.channel.send(
|
|
|
|
|
|
"❌ You must be in a voice channel! "
|
|
|
|
|
|
"Or mention a voice channel like `!miku join #voice-chat`"
|
|
|
|
|
|
)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Check permissions
|
|
|
|
|
|
if not voice_channel.permissions_for(message.guild.me).connect:
|
|
|
|
|
|
await message.channel.send(f"❌ I don't have permission to join {voice_channel.mention}!")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
if not voice_channel.permissions_for(message.guild.me).speak:
|
|
|
|
|
|
await message.channel.send(f"❌ I don't have permission to speak in {voice_channel.mention}!")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Start session
|
|
|
|
|
|
try:
|
|
|
|
|
|
await message.channel.send(f"🎤 Joining {voice_channel.mention}...")
|
|
|
|
|
|
|
|
|
|
|
|
await voice_manager.start_session(
|
|
|
|
|
|
message.guild.id,
|
|
|
|
|
|
voice_channel,
|
|
|
|
|
|
message.channel # Use current text channel for prompts
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
embed = discord.Embed(
|
|
|
|
|
|
title="🎤 Voice Chat Active",
|
|
|
|
|
|
description=f"I've joined {voice_channel.mention}!",
|
|
|
|
|
|
color=discord.Color.from_rgb(134, 206, 203) # Miku teal
|
|
|
|
|
|
)
|
|
|
|
|
|
embed.add_field(
|
|
|
|
|
|
name="How to use",
|
|
|
|
|
|
value=f"Send messages in {message.channel.mention} to make me speak!",
|
|
|
|
|
|
inline=False
|
|
|
|
|
|
)
|
|
|
|
|
|
embed.add_field(
|
|
|
|
|
|
name="⚠️ Resource Mode",
|
|
|
|
|
|
value=(
|
|
|
|
|
|
"• Text inference on AMD GPU only\n"
|
|
|
|
|
|
"• Vision model disabled\n"
|
|
|
|
|
|
"• Image generation disabled\n"
|
|
|
|
|
|
"• Other text channels paused"
|
|
|
|
|
|
),
|
|
|
|
|
|
inline=False
|
|
|
|
|
|
)
|
|
|
|
|
|
embed.set_footer(text="Use !miku leave to end the session")
|
|
|
|
|
|
|
|
|
|
|
|
await message.channel.send(embed=embed)
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Voice session started by {message.author} in {voice_channel.name}")
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
await message.channel.send(f"❌ Failed to join voice: {str(e)}")
|
|
|
|
|
|
logger.error(f"Failed to start voice session: {e}", exc_info=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _handle_leave(message):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Handle !miku leave command.
|
|
|
|
|
|
Leave voice channel and release all resources.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not voice_manager.active_session:
|
|
|
|
|
|
await message.channel.send("❌ I'm not in a voice channel!")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Check if user is in the same guild as the active session
|
|
|
|
|
|
if voice_manager.active_session.guild_id != message.guild.id:
|
|
|
|
|
|
await message.channel.send("❌ I'm in a voice channel in a different server!")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
voice_channel_name = voice_manager.active_session.voice_channel.name
|
|
|
|
|
|
|
|
|
|
|
|
await message.channel.send("👋 Leaving voice channel...")
|
|
|
|
|
|
|
|
|
|
|
|
await voice_manager.end_session()
|
|
|
|
|
|
|
|
|
|
|
|
embed = discord.Embed(
|
|
|
|
|
|
title="👋 Voice Chat Ended",
|
|
|
|
|
|
description=f"Left {voice_channel_name}",
|
|
|
|
|
|
color=discord.Color.from_rgb(134, 206, 203)
|
|
|
|
|
|
)
|
|
|
|
|
|
embed.add_field(
|
|
|
|
|
|
name="✅ Resources Released",
|
|
|
|
|
|
value=(
|
|
|
|
|
|
"• Vision model available\n"
|
|
|
|
|
|
"• Image generation available\n"
|
|
|
|
|
|
"• Text channels resumed\n"
|
|
|
|
|
|
"• All features restored"
|
|
|
|
|
|
),
|
|
|
|
|
|
inline=False
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
await message.channel.send(embed=embed)
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Voice session ended by {message.author}")
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
await message.channel.send(f"⚠️ Error leaving voice: {str(e)}")
|
|
|
|
|
|
logger.error(f"Failed to end voice session: {e}", exc_info=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _handle_status(message):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Handle !miku voice-status command.
|
|
|
|
|
|
Show current voice session status.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not voice_manager.active_session:
|
|
|
|
|
|
embed = discord.Embed(
|
|
|
|
|
|
title="🔇 No Active Voice Session",
|
|
|
|
|
|
description="I'm not currently in a voice channel.",
|
|
|
|
|
|
color=discord.Color.greyple()
|
|
|
|
|
|
)
|
|
|
|
|
|
embed.add_field(
|
|
|
|
|
|
name="To start",
|
|
|
|
|
|
value="Use `!miku join` while in a voice channel",
|
|
|
|
|
|
inline=False
|
|
|
|
|
|
)
|
|
|
|
|
|
await message.channel.send(embed=embed)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
session = voice_manager.active_session
|
|
|
|
|
|
|
|
|
|
|
|
# Check if in same guild
|
|
|
|
|
|
if session.guild_id != message.guild.id:
|
|
|
|
|
|
await message.channel.send("ℹ️ I'm in a voice channel in a different server.")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
embed = discord.Embed(
|
|
|
|
|
|
title="🎤 Voice Session Active",
|
|
|
|
|
|
description=f"Currently in voice chat",
|
|
|
|
|
|
color=discord.Color.from_rgb(134, 206, 203)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
embed.add_field(
|
|
|
|
|
|
name="Voice Channel",
|
|
|
|
|
|
value=session.voice_channel.mention,
|
|
|
|
|
|
inline=True
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
embed.add_field(
|
|
|
|
|
|
name="Prompt Channel",
|
|
|
|
|
|
value=session.text_channel.mention,
|
|
|
|
|
|
inline=True
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
embed.add_field(
|
|
|
|
|
|
name="📊 Resource Allocation",
|
|
|
|
|
|
value=(
|
|
|
|
|
|
"**GPU Usage:**\n"
|
|
|
|
|
|
"• AMD RX 6800: Text model + RVC\n"
|
|
|
|
|
|
"• GTX 1660: Soprano TTS only\n\n"
|
|
|
|
|
|
"**Blocked Features:**\n"
|
|
|
|
|
|
"• ❌ Vision model\n"
|
|
|
|
|
|
"• ❌ Image generation\n"
|
|
|
|
|
|
"• ❌ Bipolar mode\n"
|
|
|
|
|
|
"• ❌ Profile picture changes\n"
|
|
|
|
|
|
"• ⏸️ Autonomous engine\n"
|
|
|
|
|
|
"• ⏸️ Scheduled events\n"
|
|
|
|
|
|
"• 📦 Other text channels (queued)"
|
|
|
|
|
|
),
|
|
|
|
|
|
inline=False
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
embed.set_footer(text="Use !miku leave to end the session")
|
|
|
|
|
|
|
|
|
|
|
|
await message.channel.send(embed=embed)
|
2026-01-16 23:37:34 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _handle_test(message, args):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Handle !miku test command.
|
|
|
|
|
|
Test TTS audio playback in the current voice session.
|
|
|
|
|
|
"""
|
|
|
|
|
|
session = voice_manager.active_session
|
|
|
|
|
|
|
|
|
|
|
|
if not session:
|
|
|
|
|
|
await message.channel.send("❌ No active voice session! Use `!miku join` first.")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
if not session.audio_source:
|
|
|
|
|
|
await message.channel.send("❌ Audio source not connected!")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Get test text from args or use default
|
|
|
|
|
|
test_text = " ".join(args) if args else "Hello! This is a test of my voice chat system."
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
await message.channel.send(f"🎤 Speaking: *\"{test_text}\"*")
|
|
|
|
|
|
logger.info(f"Testing voice playback: {test_text}")
|
|
|
|
|
|
|
|
|
|
|
|
# Stream text to TTS via the audio source
|
|
|
|
|
|
await session.audio_source.stream_text(test_text)
|
|
|
|
|
|
|
|
|
|
|
|
await message.add_reaction("✅")
|
|
|
|
|
|
logger.info("✓ Test audio sent to TTS")
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Failed to test voice playback: {e}", exc_info=True)
|
|
|
|
|
|
await message.channel.send(f"❌ Error testing voice: {e}")
|
2026-01-17 00:01:17 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _handle_say(message, args):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Handle !miku say command.
|
|
|
|
|
|
Send user message to LLM and speak the response in voice chat.
|
|
|
|
|
|
|
|
|
|
|
|
Phase 3: Text → LLM → Voice (STT deferred to later phase)
|
|
|
|
|
|
"""
|
|
|
|
|
|
# Validate args
|
|
|
|
|
|
if not args:
|
|
|
|
|
|
await message.channel.send("❌ Usage: `!miku say <your message>`")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Check active voice session
|
|
|
|
|
|
session = voice_manager.active_session
|
|
|
|
|
|
if not session:
|
|
|
|
|
|
await message.channel.send("❌ No active voice session! Use `!miku join` first.")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
if not session.audio_source:
|
|
|
|
|
|
await message.channel.send("❌ Audio source not connected!")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Extract user message
|
|
|
|
|
|
user_message = " ".join(args)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Show processing indicator
|
|
|
|
|
|
await message.channel.send(f"💭 Processing: *\"{user_message}\"*")
|
|
|
|
|
|
logger.info(f"Voice say: user={message.author.name}, message={user_message}")
|
|
|
|
|
|
|
|
|
|
|
|
# Prepare LLM payload (based on query_llama logic)
|
|
|
|
|
|
from utils.llm import get_current_gpu_url
|
|
|
|
|
|
import globals
|
|
|
|
|
|
|
|
|
|
|
|
# Simple system prompt for voice responses
|
|
|
|
|
|
system_prompt = """You are Hatsune Miku, the virtual singer.
|
|
|
|
|
|
Respond naturally and concisely as Miku would in a voice conversation.
|
|
|
|
|
|
Keep responses short (1-3 sentences) since they will be spoken aloud."""
|
|
|
|
|
|
|
|
|
|
|
|
payload = {
|
|
|
|
|
|
"model": globals.TEXT_MODEL,
|
|
|
|
|
|
"messages": [
|
|
|
|
|
|
{"role": "system", "content": system_prompt},
|
|
|
|
|
|
{"role": "user", "content": user_message}
|
|
|
|
|
|
],
|
|
|
|
|
|
"stream": True,
|
|
|
|
|
|
"temperature": 0.8,
|
|
|
|
|
|
"max_tokens": 200 # Shorter for voice
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
headers = {'Content-Type': 'application/json'}
|
|
|
|
|
|
llama_url = get_current_gpu_url()
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Streaming LLM from {llama_url}")
|
|
|
|
|
|
|
|
|
|
|
|
# Stream LLM response and send tokens to TTS
|
|
|
|
|
|
async with aiohttp.ClientSession() as http_session:
|
|
|
|
|
|
async with http_session.post(
|
|
|
|
|
|
f"{llama_url}/v1/chat/completions",
|
|
|
|
|
|
json=payload,
|
|
|
|
|
|
headers=headers,
|
|
|
|
|
|
timeout=aiohttp.ClientTimeout(total=60)
|
|
|
|
|
|
) as response:
|
|
|
|
|
|
if response.status != 200:
|
|
|
|
|
|
error_text = await response.text()
|
|
|
|
|
|
raise Exception(f"LLM error {response.status}: {error_text}")
|
|
|
|
|
|
|
|
|
|
|
|
# Process streaming response
|
|
|
|
|
|
full_response = ""
|
|
|
|
|
|
async for line in response.content:
|
|
|
|
|
|
line = line.decode('utf-8').strip()
|
|
|
|
|
|
if line.startswith('data: '):
|
|
|
|
|
|
data_str = line[6:] # Remove 'data: ' prefix
|
|
|
|
|
|
if data_str == '[DONE]':
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = json.loads(data_str)
|
|
|
|
|
|
if 'choices' in data and len(data['choices']) > 0:
|
|
|
|
|
|
delta = data['choices'][0].get('delta', {})
|
|
|
|
|
|
content = delta.get('content', '')
|
|
|
|
|
|
if content:
|
|
|
|
|
|
# Send token to TTS
|
|
|
|
|
|
await session.audio_source.send_token(content)
|
|
|
|
|
|
full_response += content
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# Send flush command to trigger synthesis of remaining tokens
|
|
|
|
|
|
await session.audio_source.flush()
|
|
|
|
|
|
|
|
|
|
|
|
# Show what Miku said
|
|
|
|
|
|
await message.channel.send(f"🎤 Miku: *\"{full_response.strip()}\"*")
|
|
|
|
|
|
logger.info(f"✓ Voice say complete: {full_response.strip()}")
|
|
|
|
|
|
await message.add_reaction("✅")
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Voice say failed: {e}", exc_info=True)
|
|
|
|
|
|
await message.channel.send(f"❌ Voice say failed: {str(e)}")
|
|
|
|
|
|
|