469 lines
16 KiB
Python
469 lines
16 KiB
Python
# voice.py
|
||
"""
|
||
Voice channel commands for Miku Discord bot.
|
||
Handles joining, leaving, and status commands for voice chat sessions.
|
||
"""
|
||
|
||
import discord
|
||
import aiohttp
|
||
import json
|
||
from utils.voice_manager import voice_manager
|
||
from utils.logger import get_logger
|
||
from utils.llm import get_current_gpu_url
|
||
|
||
logger = get_logger('voice_commands')
|
||
|
||
|
||
async def handle_voice_command(message, cmd, args):
|
||
"""
|
||
Handle voice-related commands.
|
||
|
||
Args:
|
||
message: Discord message object
|
||
cmd: Command name (join, leave, voice-status, test)
|
||
args: Command arguments
|
||
"""
|
||
|
||
if cmd == 'join':
|
||
await _handle_join(message, args)
|
||
|
||
elif cmd == 'leave':
|
||
await _handle_leave(message)
|
||
|
||
elif cmd == 'voice-status':
|
||
await _handle_status(message)
|
||
|
||
elif cmd == 'test':
|
||
await _handle_test(message, args)
|
||
|
||
elif cmd == 'say':
|
||
await _handle_say(message, args)
|
||
|
||
elif cmd == 'listen':
|
||
await _handle_listen(message, args)
|
||
|
||
elif cmd == 'stop-listening':
|
||
await _handle_stop_listening(message, args)
|
||
|
||
else:
|
||
await message.channel.send(f"❌ Unknown voice command: `{cmd}`")
|
||
|
||
|
||
async def _handle_join(message, args):
|
||
"""
|
||
Handle !miku join command.
|
||
Join voice channel and start session with resource locks.
|
||
"""
|
||
# Get voice channel
|
||
voice_channel = None
|
||
|
||
if args and args[0].startswith('<#'):
|
||
# Channel mentioned (e.g., !miku join #voice-chat)
|
||
try:
|
||
channel_id = int(args[0][2:-1])
|
||
voice_channel = message.guild.get_channel(channel_id)
|
||
|
||
if not isinstance(voice_channel, discord.VoiceChannel):
|
||
await message.channel.send("❌ That's not a voice channel!")
|
||
return
|
||
except (ValueError, AttributeError):
|
||
await message.channel.send("❌ Invalid channel!")
|
||
return
|
||
|
||
else:
|
||
# Use user's current voice channel
|
||
if message.author.voice and message.author.voice.channel:
|
||
voice_channel = message.author.voice.channel
|
||
else:
|
||
await message.channel.send(
|
||
"❌ You must be in a voice channel! "
|
||
"Or mention a voice channel like `!miku join #voice-chat`"
|
||
)
|
||
return
|
||
|
||
# Check permissions
|
||
if not voice_channel.permissions_for(message.guild.me).connect:
|
||
await message.channel.send(f"❌ I don't have permission to join {voice_channel.mention}!")
|
||
return
|
||
|
||
if not voice_channel.permissions_for(message.guild.me).speak:
|
||
await message.channel.send(f"❌ I don't have permission to speak in {voice_channel.mention}!")
|
||
return
|
||
|
||
# Start session
|
||
try:
|
||
await message.channel.send(f"🎤 Joining {voice_channel.mention}...")
|
||
|
||
await voice_manager.start_session(
|
||
message.guild.id,
|
||
voice_channel,
|
||
message.channel # Use current text channel for prompts
|
||
)
|
||
|
||
embed = discord.Embed(
|
||
title="🎤 Voice Chat Active",
|
||
description=f"I've joined {voice_channel.mention}!",
|
||
color=discord.Color.from_rgb(134, 206, 203) # Miku teal
|
||
)
|
||
embed.add_field(
|
||
name="How to use",
|
||
value=f"Send messages in {message.channel.mention} to make me speak!",
|
||
inline=False
|
||
)
|
||
embed.add_field(
|
||
name="⚠️ Resource Mode",
|
||
value=(
|
||
"• Text inference on AMD GPU only\n"
|
||
"• Vision model disabled\n"
|
||
"• Image generation disabled\n"
|
||
"• Other text channels paused"
|
||
),
|
||
inline=False
|
||
)
|
||
embed.set_footer(text="Use !miku leave to end the session")
|
||
|
||
await message.channel.send(embed=embed)
|
||
|
||
logger.info(f"Voice session started by {message.author} in {voice_channel.name}")
|
||
|
||
except Exception as e:
|
||
await message.channel.send(f"❌ Failed to join voice: {str(e)}")
|
||
logger.error(f"Failed to start voice session: {e}", exc_info=True)
|
||
|
||
|
||
async def _handle_leave(message):
|
||
"""
|
||
Handle !miku leave command.
|
||
Leave voice channel and release all resources.
|
||
"""
|
||
if not voice_manager.active_session:
|
||
await message.channel.send("❌ I'm not in a voice channel!")
|
||
return
|
||
|
||
# Check if user is in the same guild as the active session
|
||
if voice_manager.active_session.guild_id != message.guild.id:
|
||
await message.channel.send("❌ I'm in a voice channel in a different server!")
|
||
return
|
||
|
||
try:
|
||
voice_channel_name = voice_manager.active_session.voice_channel.name
|
||
|
||
await message.channel.send("👋 Leaving voice channel...")
|
||
|
||
await voice_manager.end_session()
|
||
|
||
embed = discord.Embed(
|
||
title="👋 Voice Chat Ended",
|
||
description=f"Left {voice_channel_name}",
|
||
color=discord.Color.from_rgb(134, 206, 203)
|
||
)
|
||
embed.add_field(
|
||
name="✅ Resources Released",
|
||
value=(
|
||
"• Vision model available\n"
|
||
"• Image generation available\n"
|
||
"• Text channels resumed\n"
|
||
"• All features restored"
|
||
),
|
||
inline=False
|
||
)
|
||
|
||
await message.channel.send(embed=embed)
|
||
|
||
logger.info(f"Voice session ended by {message.author}")
|
||
|
||
except Exception as e:
|
||
await message.channel.send(f"⚠️ Error leaving voice: {str(e)}")
|
||
logger.error(f"Failed to end voice session: {e}", exc_info=True)
|
||
|
||
|
||
async def _handle_status(message):
|
||
"""
|
||
Handle !miku voice-status command.
|
||
Show current voice session status.
|
||
"""
|
||
if not voice_manager.active_session:
|
||
embed = discord.Embed(
|
||
title="🔇 No Active Voice Session",
|
||
description="I'm not currently in a voice channel.",
|
||
color=discord.Color.greyple()
|
||
)
|
||
embed.add_field(
|
||
name="To start",
|
||
value="Use `!miku join` while in a voice channel",
|
||
inline=False
|
||
)
|
||
await message.channel.send(embed=embed)
|
||
return
|
||
|
||
session = voice_manager.active_session
|
||
|
||
# Check if in same guild
|
||
if session.guild_id != message.guild.id:
|
||
await message.channel.send("ℹ️ I'm in a voice channel in a different server.")
|
||
return
|
||
|
||
embed = discord.Embed(
|
||
title="🎤 Voice Session Active",
|
||
description=f"Currently in voice chat",
|
||
color=discord.Color.from_rgb(134, 206, 203)
|
||
)
|
||
|
||
embed.add_field(
|
||
name="Voice Channel",
|
||
value=session.voice_channel.mention,
|
||
inline=True
|
||
)
|
||
|
||
embed.add_field(
|
||
name="Prompt Channel",
|
||
value=session.text_channel.mention,
|
||
inline=True
|
||
)
|
||
|
||
embed.add_field(
|
||
name="📊 Resource Allocation",
|
||
value=(
|
||
"**GPU Usage:**\n"
|
||
"• AMD RX 6800: Text model + RVC\n"
|
||
"• GTX 1660: Soprano TTS only\n\n"
|
||
"**Blocked Features:**\n"
|
||
"• ❌ Vision model\n"
|
||
"• ❌ Image generation\n"
|
||
"• ❌ Bipolar mode\n"
|
||
"• ❌ Profile picture changes\n"
|
||
"• ⏸️ Autonomous engine\n"
|
||
"• ⏸️ Scheduled events\n"
|
||
"• 📦 Other text channels (queued)"
|
||
),
|
||
inline=False
|
||
)
|
||
|
||
embed.set_footer(text="Use !miku leave to end the session")
|
||
|
||
await message.channel.send(embed=embed)
|
||
|
||
|
||
async def _handle_test(message, args):
|
||
"""
|
||
Handle !miku test command.
|
||
Test TTS audio playback in the current voice session.
|
||
"""
|
||
session = voice_manager.active_session
|
||
|
||
if not session:
|
||
await message.channel.send("❌ No active voice session! Use `!miku join` first.")
|
||
return
|
||
|
||
if not session.audio_source:
|
||
await message.channel.send("❌ Audio source not connected!")
|
||
return
|
||
|
||
# Get test text from args or use default
|
||
test_text = " ".join(args) if args else "Hello! This is a test of my voice chat system."
|
||
|
||
try:
|
||
await message.channel.send(f"🎤 Speaking: *\"{test_text}\"*")
|
||
logger.info(f"Testing voice playback: {test_text}")
|
||
|
||
# Stream text to TTS via the audio source
|
||
await session.audio_source.stream_text(test_text)
|
||
|
||
await message.add_reaction("✅")
|
||
logger.info("✓ Test audio sent to TTS")
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to test voice playback: {e}", exc_info=True)
|
||
await message.channel.send(f"❌ Error testing voice: {e}")
|
||
|
||
|
||
async def _handle_say(message, args):
|
||
"""
|
||
Handle !miku say command.
|
||
Send user message to LLM and speak the response in voice chat.
|
||
|
||
Phase 3: Text → LLM → Voice (STT deferred to later phase)
|
||
"""
|
||
# Validate args
|
||
if not args:
|
||
await message.channel.send("❌ Usage: `!miku say <your message>`")
|
||
return
|
||
|
||
# Check active voice session
|
||
session = voice_manager.active_session
|
||
if not session:
|
||
await message.channel.send("❌ No active voice session! Use `!miku join` first.")
|
||
return
|
||
|
||
if not session.audio_source:
|
||
await message.channel.send("❌ Audio source not connected!")
|
||
return
|
||
|
||
# Extract user message
|
||
user_message = " ".join(args)
|
||
|
||
try:
|
||
# Show processing indicator
|
||
await message.channel.send(f"💭 Processing: *\"{user_message}\"*")
|
||
logger.info(f"Voice say: user={message.author.name}, message={user_message}")
|
||
|
||
# Prepare LLM payload (based on query_llama logic)
|
||
from utils.llm import get_current_gpu_url
|
||
import globals
|
||
|
||
# Simple system prompt for voice responses
|
||
system_prompt = """You are Hatsune Miku, the virtual singer.
|
||
Respond naturally and concisely as Miku would in a voice conversation.
|
||
Keep responses short (1-3 sentences) since they will be spoken aloud."""
|
||
|
||
payload = {
|
||
"model": globals.TEXT_MODEL,
|
||
"messages": [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_message}
|
||
],
|
||
"stream": True,
|
||
"temperature": 0.8,
|
||
"max_tokens": 200 # Shorter for voice
|
||
}
|
||
|
||
headers = {'Content-Type': 'application/json'}
|
||
llama_url = get_current_gpu_url()
|
||
|
||
logger.info(f"Streaming LLM from {llama_url}")
|
||
|
||
# Stream LLM response and send tokens to TTS
|
||
async with aiohttp.ClientSession() as http_session:
|
||
async with http_session.post(
|
||
f"{llama_url}/v1/chat/completions",
|
||
json=payload,
|
||
headers=headers,
|
||
timeout=aiohttp.ClientTimeout(total=60)
|
||
) as response:
|
||
if response.status != 200:
|
||
error_text = await response.text()
|
||
raise Exception(f"LLM error {response.status}: {error_text}")
|
||
|
||
# Process streaming response
|
||
full_response = ""
|
||
async for line in response.content:
|
||
line = line.decode('utf-8').strip()
|
||
if line.startswith('data: '):
|
||
data_str = line[6:] # Remove 'data: ' prefix
|
||
if data_str == '[DONE]':
|
||
break
|
||
|
||
try:
|
||
data = json.loads(data_str)
|
||
if 'choices' in data and len(data['choices']) > 0:
|
||
delta = data['choices'][0].get('delta', {})
|
||
content = delta.get('content', '')
|
||
if content:
|
||
# Send token to TTS
|
||
await session.audio_source.send_token(content)
|
||
full_response += content
|
||
except json.JSONDecodeError:
|
||
continue
|
||
|
||
# Send flush command to trigger synthesis of remaining tokens
|
||
await session.audio_source.flush()
|
||
|
||
# Show what Miku said
|
||
await message.channel.send(f"🎤 Miku: *\"{full_response.strip()}\"*")
|
||
logger.info(f"✓ Voice say complete: {full_response.strip()}")
|
||
await message.add_reaction("✅")
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to generate voice response: {e}", exc_info=True)
|
||
await message.channel.send(f"❌ Error generating voice response: {e}")
|
||
|
||
|
||
async def _handle_listen(message, args):
|
||
"""
|
||
Handle !miku listen command.
|
||
Start listening to a user's voice for STT.
|
||
|
||
Usage:
|
||
!miku listen - Start listening to command author
|
||
!miku listen @user - Start listening to mentioned user
|
||
"""
|
||
# Check if Miku is in voice channel
|
||
session = voice_manager.active_session
|
||
|
||
if not session or not session.voice_client or not session.voice_client.is_connected():
|
||
await message.channel.send("❌ I'm not in a voice channel! Use `!miku join` first.")
|
||
return
|
||
|
||
# Determine target user
|
||
target_user = None
|
||
if args and len(message.mentions) > 0:
|
||
# Listen to mentioned user
|
||
target_user = message.mentions[0]
|
||
else:
|
||
# Listen to command author
|
||
target_user = message.author
|
||
|
||
# Check if user is in voice channel
|
||
if not target_user.voice or not target_user.voice.channel:
|
||
await message.channel.send(f"❌ {target_user.mention} is not in a voice channel!")
|
||
return
|
||
|
||
# Check if user is in same channel as Miku
|
||
if target_user.voice.channel.id != session.voice_client.channel.id:
|
||
await message.channel.send(
|
||
f"❌ {target_user.mention} must be in the same voice channel as me!"
|
||
)
|
||
return
|
||
|
||
try:
|
||
# Start listening to user
|
||
await session.start_listening(target_user)
|
||
await message.channel.send(
|
||
f"👂 Now listening to {target_user.mention}'s voice! "
|
||
f"Speak to me and I'll respond. Use `!miku stop-listening` to stop."
|
||
)
|
||
await message.add_reaction("👂")
|
||
logger.info(f"Started listening to user {target_user.id} ({target_user.name})")
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to start listening: {e}", exc_info=True)
|
||
await message.channel.send(f"❌ Failed to start listening: {str(e)}")
|
||
|
||
|
||
async def _handle_stop_listening(message, args):
|
||
"""
|
||
Handle !miku stop-listening command.
|
||
Stop listening to a user's voice.
|
||
|
||
Usage:
|
||
!miku stop-listening - Stop listening to command author
|
||
!miku stop-listening @user - Stop listening to mentioned user
|
||
"""
|
||
# Check if Miku is in voice channel
|
||
session = voice_manager.active_session
|
||
|
||
if not session:
|
||
await message.channel.send("❌ I'm not in a voice channel!")
|
||
return
|
||
|
||
# Determine target user
|
||
target_user = None
|
||
if args and len(message.mentions) > 0:
|
||
# Stop listening to mentioned user
|
||
target_user = message.mentions[0]
|
||
else:
|
||
# Stop listening to command author
|
||
target_user = message.author
|
||
|
||
try:
|
||
# Stop listening to user
|
||
await session.stop_listening(target_user.id)
|
||
await message.channel.send(f"🔇 Stopped listening to {target_user.mention}.")
|
||
await message.add_reaction("🔇")
|
||
logger.info(f"Stopped listening to user {target_user.id} ({target_user.name})")
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to stop listening: {e}", exc_info=True)
|
||
await message.channel.send(f"❌ Failed to stop listening: {str(e)}")
|
||
|