Files

469 lines
16 KiB
Python
Raw Permalink Normal View History

# voice.py
"""
Voice channel commands for Miku Discord bot.
Handles joining, leaving, and status commands for voice chat sessions.
"""
import discord
import aiohttp
import json
from utils.voice_manager import voice_manager
from utils.logger import get_logger
from utils.llm import get_current_gpu_url
logger = get_logger('voice_commands')
async def handle_voice_command(message, cmd, args):
"""
Handle voice-related commands.
Args:
message: Discord message object
cmd: Command name (join, leave, voice-status, test)
args: Command arguments
"""
if cmd == 'join':
await _handle_join(message, args)
elif cmd == 'leave':
await _handle_leave(message)
elif cmd == 'voice-status':
await _handle_status(message)
elif cmd == 'test':
await _handle_test(message, args)
elif cmd == 'say':
await _handle_say(message, args)
elif cmd == 'listen':
await _handle_listen(message, args)
elif cmd == 'stop-listening':
await _handle_stop_listening(message, args)
else:
await message.channel.send(f"❌ Unknown voice command: `{cmd}`")
async def _handle_join(message, args):
"""
Handle !miku join command.
Join voice channel and start session with resource locks.
"""
# Get voice channel
voice_channel = None
if args and args[0].startswith('<#'):
# Channel mentioned (e.g., !miku join #voice-chat)
try:
channel_id = int(args[0][2:-1])
voice_channel = message.guild.get_channel(channel_id)
if not isinstance(voice_channel, discord.VoiceChannel):
await message.channel.send("❌ That's not a voice channel!")
return
except (ValueError, AttributeError):
await message.channel.send("❌ Invalid channel!")
return
else:
# Use user's current voice channel
if message.author.voice and message.author.voice.channel:
voice_channel = message.author.voice.channel
else:
await message.channel.send(
"❌ You must be in a voice channel! "
"Or mention a voice channel like `!miku join #voice-chat`"
)
return
# Check permissions
if not voice_channel.permissions_for(message.guild.me).connect:
await message.channel.send(f"❌ I don't have permission to join {voice_channel.mention}!")
return
if not voice_channel.permissions_for(message.guild.me).speak:
await message.channel.send(f"❌ I don't have permission to speak in {voice_channel.mention}!")
return
# Start session
try:
await message.channel.send(f"🎤 Joining {voice_channel.mention}...")
await voice_manager.start_session(
message.guild.id,
voice_channel,
message.channel # Use current text channel for prompts
)
embed = discord.Embed(
title="🎤 Voice Chat Active",
description=f"I've joined {voice_channel.mention}!",
color=discord.Color.from_rgb(134, 206, 203) # Miku teal
)
embed.add_field(
name="How to use",
value=f"Send messages in {message.channel.mention} to make me speak!",
inline=False
)
embed.add_field(
name="⚠️ Resource Mode",
value=(
"• Text inference on AMD GPU only\n"
"• Vision model disabled\n"
"• Image generation disabled\n"
"• Other text channels paused"
),
inline=False
)
embed.set_footer(text="Use !miku leave to end the session")
await message.channel.send(embed=embed)
logger.info(f"Voice session started by {message.author} in {voice_channel.name}")
except Exception as e:
await message.channel.send(f"❌ Failed to join voice: {str(e)}")
logger.error(f"Failed to start voice session: {e}", exc_info=True)
async def _handle_leave(message):
"""
Handle !miku leave command.
Leave voice channel and release all resources.
"""
if not voice_manager.active_session:
await message.channel.send("❌ I'm not in a voice channel!")
return
# Check if user is in the same guild as the active session
if voice_manager.active_session.guild_id != message.guild.id:
await message.channel.send("❌ I'm in a voice channel in a different server!")
return
try:
voice_channel_name = voice_manager.active_session.voice_channel.name
await message.channel.send("👋 Leaving voice channel...")
await voice_manager.end_session()
embed = discord.Embed(
title="👋 Voice Chat Ended",
description=f"Left {voice_channel_name}",
color=discord.Color.from_rgb(134, 206, 203)
)
embed.add_field(
name="✅ Resources Released",
value=(
"• Vision model available\n"
"• Image generation available\n"
"• Text channels resumed\n"
"• All features restored"
),
inline=False
)
await message.channel.send(embed=embed)
logger.info(f"Voice session ended by {message.author}")
except Exception as e:
await message.channel.send(f"⚠️ Error leaving voice: {str(e)}")
logger.error(f"Failed to end voice session: {e}", exc_info=True)
async def _handle_status(message):
"""
Handle !miku voice-status command.
Show current voice session status.
"""
if not voice_manager.active_session:
embed = discord.Embed(
title="🔇 No Active Voice Session",
description="I'm not currently in a voice channel.",
color=discord.Color.greyple()
)
embed.add_field(
name="To start",
value="Use `!miku join` while in a voice channel",
inline=False
)
await message.channel.send(embed=embed)
return
session = voice_manager.active_session
# Check if in same guild
if session.guild_id != message.guild.id:
await message.channel.send(" I'm in a voice channel in a different server.")
return
embed = discord.Embed(
title="🎤 Voice Session Active",
description=f"Currently in voice chat",
color=discord.Color.from_rgb(134, 206, 203)
)
embed.add_field(
name="Voice Channel",
value=session.voice_channel.mention,
inline=True
)
embed.add_field(
name="Prompt Channel",
value=session.text_channel.mention,
inline=True
)
embed.add_field(
name="📊 Resource Allocation",
value=(
"**GPU Usage:**\n"
"• AMD RX 6800: Text model + RVC\n"
"• GTX 1660: Soprano TTS only\n\n"
"**Blocked Features:**\n"
"• ❌ Vision model\n"
"• ❌ Image generation\n"
"• ❌ Bipolar mode\n"
"• ❌ Profile picture changes\n"
"• ⏸️ Autonomous engine\n"
"• ⏸️ Scheduled events\n"
"• 📦 Other text channels (queued)"
),
inline=False
)
embed.set_footer(text="Use !miku leave to end the session")
await message.channel.send(embed=embed)
async def _handle_test(message, args):
"""
Handle !miku test command.
Test TTS audio playback in the current voice session.
"""
session = voice_manager.active_session
if not session:
await message.channel.send("❌ No active voice session! Use `!miku join` first.")
return
if not session.audio_source:
await message.channel.send("❌ Audio source not connected!")
return
# Get test text from args or use default
test_text = " ".join(args) if args else "Hello! This is a test of my voice chat system."
try:
await message.channel.send(f"🎤 Speaking: *\"{test_text}\"*")
logger.info(f"Testing voice playback: {test_text}")
# Stream text to TTS via the audio source
await session.audio_source.stream_text(test_text)
await message.add_reaction("")
logger.info("✓ Test audio sent to TTS")
except Exception as e:
logger.error(f"Failed to test voice playback: {e}", exc_info=True)
await message.channel.send(f"❌ Error testing voice: {e}")
async def _handle_say(message, args):
"""
Handle !miku say command.
Send user message to LLM and speak the response in voice chat.
Phase 3: Text LLM Voice (STT deferred to later phase)
"""
# Validate args
if not args:
await message.channel.send("❌ Usage: `!miku say <your message>`")
return
# Check active voice session
session = voice_manager.active_session
if not session:
await message.channel.send("❌ No active voice session! Use `!miku join` first.")
return
if not session.audio_source:
await message.channel.send("❌ Audio source not connected!")
return
# Extract user message
user_message = " ".join(args)
try:
# Show processing indicator
await message.channel.send(f"💭 Processing: *\"{user_message}\"*")
logger.info(f"Voice say: user={message.author.name}, message={user_message}")
# Prepare LLM payload (based on query_llama logic)
from utils.llm import get_current_gpu_url
import globals
# Simple system prompt for voice responses
system_prompt = """You are Hatsune Miku, the virtual singer.
Respond naturally and concisely as Miku would in a voice conversation.
Keep responses short (1-3 sentences) since they will be spoken aloud."""
payload = {
"model": globals.TEXT_MODEL,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
],
"stream": True,
"temperature": 0.8,
"max_tokens": 200 # Shorter for voice
}
headers = {'Content-Type': 'application/json'}
llama_url = get_current_gpu_url()
logger.info(f"Streaming LLM from {llama_url}")
# Stream LLM response and send tokens to TTS
async with aiohttp.ClientSession() as http_session:
async with http_session.post(
f"{llama_url}/v1/chat/completions",
json=payload,
headers=headers,
timeout=aiohttp.ClientTimeout(total=60)
) as response:
if response.status != 200:
error_text = await response.text()
raise Exception(f"LLM error {response.status}: {error_text}")
# Process streaming response
full_response = ""
async for line in response.content:
line = line.decode('utf-8').strip()
if line.startswith('data: '):
data_str = line[6:] # Remove 'data: ' prefix
if data_str == '[DONE]':
break
try:
data = json.loads(data_str)
if 'choices' in data and len(data['choices']) > 0:
delta = data['choices'][0].get('delta', {})
content = delta.get('content', '')
if content:
# Send token to TTS
await session.audio_source.send_token(content)
full_response += content
except json.JSONDecodeError:
continue
# Send flush command to trigger synthesis of remaining tokens
await session.audio_source.flush()
# Show what Miku said
await message.channel.send(f"🎤 Miku: *\"{full_response.strip()}\"*")
logger.info(f"✓ Voice say complete: {full_response.strip()}")
await message.add_reaction("")
except Exception as e:
logger.error(f"Failed to generate voice response: {e}", exc_info=True)
await message.channel.send(f"❌ Error generating voice response: {e}")
async def _handle_listen(message, args):
"""
Handle !miku listen command.
Start listening to a user's voice for STT.
Usage:
!miku listen - Start listening to command author
!miku listen @user - Start listening to mentioned user
"""
# Check if Miku is in voice channel
session = voice_manager.active_session
if not session or not session.voice_client or not session.voice_client.is_connected():
await message.channel.send("❌ I'm not in a voice channel! Use `!miku join` first.")
return
# Determine target user
target_user = None
if args and len(message.mentions) > 0:
# Listen to mentioned user
target_user = message.mentions[0]
else:
# Listen to command author
target_user = message.author
# Check if user is in voice channel
if not target_user.voice or not target_user.voice.channel:
await message.channel.send(f"{target_user.mention} is not in a voice channel!")
return
# Check if user is in same channel as Miku
if target_user.voice.channel.id != session.voice_client.channel.id:
await message.channel.send(
f"{target_user.mention} must be in the same voice channel as me!"
)
return
try:
# Start listening to user
await session.start_listening(target_user)
await message.channel.send(
f"👂 Now listening to {target_user.mention}'s voice! "
f"Speak to me and I'll respond. Use `!miku stop-listening` to stop."
)
await message.add_reaction("👂")
logger.info(f"Started listening to user {target_user.id} ({target_user.name})")
except Exception as e:
logger.error(f"Failed to start listening: {e}", exc_info=True)
await message.channel.send(f"❌ Failed to start listening: {str(e)}")
async def _handle_stop_listening(message, args):
"""
Handle !miku stop-listening command.
Stop listening to a user's voice.
Usage:
!miku stop-listening - Stop listening to command author
!miku stop-listening @user - Stop listening to mentioned user
"""
# Check if Miku is in voice channel
session = voice_manager.active_session
if not session:
await message.channel.send("❌ I'm not in a voice channel!")
return
# Determine target user
target_user = None
if args and len(message.mentions) > 0:
# Stop listening to mentioned user
target_user = message.mentions[0]
else:
# Stop listening to command author
target_user = message.author
try:
# Stop listening to user
await session.stop_listening(target_user.id)
await message.channel.send(f"🔇 Stopped listening to {target_user.mention}.")
await message.add_reaction("🔇")
logger.info(f"Stopped listening to user {target_user.id} ({target_user.name})")
except Exception as e:
logger.error(f"Failed to stop listening: {e}", exc_info=True)
await message.channel.send(f"❌ Failed to stop listening: {str(e)}")