Files
miku-discord/bot/commands/voice.py

469 lines
16 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# voice.py
"""
Voice channel commands for Miku Discord bot.
Handles joining, leaving, and status commands for voice chat sessions.
"""
import discord
import aiohttp
import json
from utils.voice_manager import voice_manager
from utils.logger import get_logger
from utils.llm import get_current_gpu_url
logger = get_logger('voice_commands')
async def handle_voice_command(message, cmd, args):
"""
Handle voice-related commands.
Args:
message: Discord message object
cmd: Command name (join, leave, voice-status, test)
args: Command arguments
"""
if cmd == 'join':
await _handle_join(message, args)
elif cmd == 'leave':
await _handle_leave(message)
elif cmd == 'voice-status':
await _handle_status(message)
elif cmd == 'test':
await _handle_test(message, args)
elif cmd == 'say':
await _handle_say(message, args)
elif cmd == 'listen':
await _handle_listen(message, args)
elif cmd == 'stop-listening':
await _handle_stop_listening(message, args)
else:
await message.channel.send(f"❌ Unknown voice command: `{cmd}`")
async def _handle_join(message, args):
"""
Handle !miku join command.
Join voice channel and start session with resource locks.
"""
# Get voice channel
voice_channel = None
if args and args[0].startswith('<#'):
# Channel mentioned (e.g., !miku join #voice-chat)
try:
channel_id = int(args[0][2:-1])
voice_channel = message.guild.get_channel(channel_id)
if not isinstance(voice_channel, discord.VoiceChannel):
await message.channel.send("❌ That's not a voice channel!")
return
except (ValueError, AttributeError):
await message.channel.send("❌ Invalid channel!")
return
else:
# Use user's current voice channel
if message.author.voice and message.author.voice.channel:
voice_channel = message.author.voice.channel
else:
await message.channel.send(
"❌ You must be in a voice channel! "
"Or mention a voice channel like `!miku join #voice-chat`"
)
return
# Check permissions
if not voice_channel.permissions_for(message.guild.me).connect:
await message.channel.send(f"❌ I don't have permission to join {voice_channel.mention}!")
return
if not voice_channel.permissions_for(message.guild.me).speak:
await message.channel.send(f"❌ I don't have permission to speak in {voice_channel.mention}!")
return
# Start session
try:
await message.channel.send(f"🎤 Joining {voice_channel.mention}...")
await voice_manager.start_session(
message.guild.id,
voice_channel,
message.channel # Use current text channel for prompts
)
embed = discord.Embed(
title="🎤 Voice Chat Active",
description=f"I've joined {voice_channel.mention}!",
color=discord.Color.from_rgb(134, 206, 203) # Miku teal
)
embed.add_field(
name="How to use",
value=f"Send messages in {message.channel.mention} to make me speak!",
inline=False
)
embed.add_field(
name="⚠️ Resource Mode",
value=(
"• Text inference on AMD GPU only\n"
"• Vision model disabled\n"
"• Image generation disabled\n"
"• Other text channels paused"
),
inline=False
)
embed.set_footer(text="Use !miku leave to end the session")
await message.channel.send(embed=embed)
logger.info(f"Voice session started by {message.author} in {voice_channel.name}")
except Exception as e:
await message.channel.send(f"❌ Failed to join voice: {str(e)}")
logger.error(f"Failed to start voice session: {e}", exc_info=True)
async def _handle_leave(message):
"""
Handle !miku leave command.
Leave voice channel and release all resources.
"""
if not voice_manager.active_session:
await message.channel.send("❌ I'm not in a voice channel!")
return
# Check if user is in the same guild as the active session
if voice_manager.active_session.guild_id != message.guild.id:
await message.channel.send("❌ I'm in a voice channel in a different server!")
return
try:
voice_channel_name = voice_manager.active_session.voice_channel.name
await message.channel.send("👋 Leaving voice channel...")
await voice_manager.end_session()
embed = discord.Embed(
title="👋 Voice Chat Ended",
description=f"Left {voice_channel_name}",
color=discord.Color.from_rgb(134, 206, 203)
)
embed.add_field(
name="✅ Resources Released",
value=(
"• Vision model available\n"
"• Image generation available\n"
"• Text channels resumed\n"
"• All features restored"
),
inline=False
)
await message.channel.send(embed=embed)
logger.info(f"Voice session ended by {message.author}")
except Exception as e:
await message.channel.send(f"⚠️ Error leaving voice: {str(e)}")
logger.error(f"Failed to end voice session: {e}", exc_info=True)
async def _handle_status(message):
"""
Handle !miku voice-status command.
Show current voice session status.
"""
if not voice_manager.active_session:
embed = discord.Embed(
title="🔇 No Active Voice Session",
description="I'm not currently in a voice channel.",
color=discord.Color.greyple()
)
embed.add_field(
name="To start",
value="Use `!miku join` while in a voice channel",
inline=False
)
await message.channel.send(embed=embed)
return
session = voice_manager.active_session
# Check if in same guild
if session.guild_id != message.guild.id:
await message.channel.send(" I'm in a voice channel in a different server.")
return
embed = discord.Embed(
title="🎤 Voice Session Active",
description=f"Currently in voice chat",
color=discord.Color.from_rgb(134, 206, 203)
)
embed.add_field(
name="Voice Channel",
value=session.voice_channel.mention,
inline=True
)
embed.add_field(
name="Prompt Channel",
value=session.text_channel.mention,
inline=True
)
embed.add_field(
name="📊 Resource Allocation",
value=(
"**GPU Usage:**\n"
"• AMD RX 6800: Text model + RVC\n"
"• GTX 1660: Soprano TTS only\n\n"
"**Blocked Features:**\n"
"• ❌ Vision model\n"
"• ❌ Image generation\n"
"• ❌ Bipolar mode\n"
"• ❌ Profile picture changes\n"
"• ⏸️ Autonomous engine\n"
"• ⏸️ Scheduled events\n"
"• 📦 Other text channels (queued)"
),
inline=False
)
embed.set_footer(text="Use !miku leave to end the session")
await message.channel.send(embed=embed)
async def _handle_test(message, args):
"""
Handle !miku test command.
Test TTS audio playback in the current voice session.
"""
session = voice_manager.active_session
if not session:
await message.channel.send("❌ No active voice session! Use `!miku join` first.")
return
if not session.audio_source:
await message.channel.send("❌ Audio source not connected!")
return
# Get test text from args or use default
test_text = " ".join(args) if args else "Hello! This is a test of my voice chat system."
try:
await message.channel.send(f"🎤 Speaking: *\"{test_text}\"*")
logger.info(f"Testing voice playback: {test_text}")
# Stream text to TTS via the audio source
await session.audio_source.stream_text(test_text)
await message.add_reaction("")
logger.info("✓ Test audio sent to TTS")
except Exception as e:
logger.error(f"Failed to test voice playback: {e}", exc_info=True)
await message.channel.send(f"❌ Error testing voice: {e}")
async def _handle_say(message, args):
"""
Handle !miku say command.
Send user message to LLM and speak the response in voice chat.
Phase 3: Text → LLM → Voice (STT deferred to later phase)
"""
# Validate args
if not args:
await message.channel.send("❌ Usage: `!miku say <your message>`")
return
# Check active voice session
session = voice_manager.active_session
if not session:
await message.channel.send("❌ No active voice session! Use `!miku join` first.")
return
if not session.audio_source:
await message.channel.send("❌ Audio source not connected!")
return
# Extract user message
user_message = " ".join(args)
try:
# Show processing indicator
await message.channel.send(f"💭 Processing: *\"{user_message}\"*")
logger.info(f"Voice say: user={message.author.name}, message={user_message}")
# Prepare LLM payload (based on query_llama logic)
from utils.llm import get_current_gpu_url
import globals
# Simple system prompt for voice responses
system_prompt = """You are Hatsune Miku, the virtual singer.
Respond naturally and concisely as Miku would in a voice conversation.
Keep responses short (1-3 sentences) since they will be spoken aloud."""
payload = {
"model": globals.TEXT_MODEL,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
],
"stream": True,
"temperature": 0.8,
"max_tokens": 200 # Shorter for voice
}
headers = {'Content-Type': 'application/json'}
llama_url = get_current_gpu_url()
logger.info(f"Streaming LLM from {llama_url}")
# Stream LLM response and send tokens to TTS
async with aiohttp.ClientSession() as http_session:
async with http_session.post(
f"{llama_url}/v1/chat/completions",
json=payload,
headers=headers,
timeout=aiohttp.ClientTimeout(total=60)
) as response:
if response.status != 200:
error_text = await response.text()
raise Exception(f"LLM error {response.status}: {error_text}")
# Process streaming response
full_response = ""
async for line in response.content:
line = line.decode('utf-8').strip()
if line.startswith('data: '):
data_str = line[6:] # Remove 'data: ' prefix
if data_str == '[DONE]':
break
try:
data = json.loads(data_str)
if 'choices' in data and len(data['choices']) > 0:
delta = data['choices'][0].get('delta', {})
content = delta.get('content', '')
if content:
# Send token to TTS
await session.audio_source.send_token(content)
full_response += content
except json.JSONDecodeError:
continue
# Send flush command to trigger synthesis of remaining tokens
await session.audio_source.flush()
# Show what Miku said
await message.channel.send(f"🎤 Miku: *\"{full_response.strip()}\"*")
logger.info(f"✓ Voice say complete: {full_response.strip()}")
await message.add_reaction("")
except Exception as e:
logger.error(f"Failed to generate voice response: {e}", exc_info=True)
await message.channel.send(f"❌ Error generating voice response: {e}")
async def _handle_listen(message, args):
"""
Handle !miku listen command.
Start listening to a user's voice for STT.
Usage:
!miku listen - Start listening to command author
!miku listen @user - Start listening to mentioned user
"""
# Check if Miku is in voice channel
session = voice_manager.active_session
if not session or not session.voice_client or not session.voice_client.is_connected():
await message.channel.send("❌ I'm not in a voice channel! Use `!miku join` first.")
return
# Determine target user
target_user = None
if args and len(message.mentions) > 0:
# Listen to mentioned user
target_user = message.mentions[0]
else:
# Listen to command author
target_user = message.author
# Check if user is in voice channel
if not target_user.voice or not target_user.voice.channel:
await message.channel.send(f"{target_user.mention} is not in a voice channel!")
return
# Check if user is in same channel as Miku
if target_user.voice.channel.id != session.voice_client.channel.id:
await message.channel.send(
f"{target_user.mention} must be in the same voice channel as me!"
)
return
try:
# Start listening to user
await session.start_listening(target_user)
await message.channel.send(
f"👂 Now listening to {target_user.mention}'s voice! "
f"Speak to me and I'll respond. Use `!miku stop-listening` to stop."
)
await message.add_reaction("👂")
logger.info(f"Started listening to user {target_user.id} ({target_user.name})")
except Exception as e:
logger.error(f"Failed to start listening: {e}", exc_info=True)
await message.channel.send(f"❌ Failed to start listening: {str(e)}")
async def _handle_stop_listening(message, args):
"""
Handle !miku stop-listening command.
Stop listening to a user's voice.
Usage:
!miku stop-listening - Stop listening to command author
!miku stop-listening @user - Stop listening to mentioned user
"""
# Check if Miku is in voice channel
session = voice_manager.active_session
if not session:
await message.channel.send("❌ I'm not in a voice channel!")
return
# Determine target user
target_user = None
if args and len(message.mentions) > 0:
# Stop listening to mentioned user
target_user = message.mentions[0]
else:
# Stop listening to command author
target_user = message.author
try:
# Stop listening to user
await session.stop_listening(target_user.id)
await message.channel.send(f"🔇 Stopped listening to {target_user.mention}.")
await message.add_reaction("🔇")
logger.info(f"Stopped listening to user {target_user.id} ({target_user.name})")
except Exception as e:
logger.error(f"Failed to stop listening: {e}", exc_info=True)
await message.channel.send(f"❌ Failed to stop listening: {str(e)}")