Files
miku-discord/bot/commands/voice.py

374 lines
12 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# voice.py
"""
Voice channel commands for Miku Discord bot.
Handles joining, leaving, and status commands for voice chat sessions.
"""
import discord
import aiohttp
import json
from utils.voice_manager import voice_manager
from utils.logger import get_logger
from utils.llm import get_current_gpu_url
logger = get_logger('voice_commands')
async def handle_voice_command(message, cmd, args):
"""
Handle voice-related commands.
Args:
message: Discord message object
cmd: Command name (join, leave, voice-status, test)
args: Command arguments
"""
if cmd == 'join':
await _handle_join(message, args)
elif cmd == 'leave':
await _handle_leave(message)
elif cmd == 'voice-status':
await _handle_status(message)
elif cmd == 'test':
await _handle_test(message, args)
elif cmd == 'say':
await _handle_say(message, args)
else:
await message.channel.send(f"❌ Unknown voice command: `{cmd}`")
async def _handle_join(message, args):
"""
Handle !miku join command.
Join voice channel and start session with resource locks.
"""
# Get voice channel
voice_channel = None
if args and args[0].startswith('<#'):
# Channel mentioned (e.g., !miku join #voice-chat)
try:
channel_id = int(args[0][2:-1])
voice_channel = message.guild.get_channel(channel_id)
if not isinstance(voice_channel, discord.VoiceChannel):
await message.channel.send("❌ That's not a voice channel!")
return
except (ValueError, AttributeError):
await message.channel.send("❌ Invalid channel!")
return
else:
# Use user's current voice channel
if message.author.voice and message.author.voice.channel:
voice_channel = message.author.voice.channel
else:
await message.channel.send(
"❌ You must be in a voice channel! "
"Or mention a voice channel like `!miku join #voice-chat`"
)
return
# Check permissions
if not voice_channel.permissions_for(message.guild.me).connect:
await message.channel.send(f"❌ I don't have permission to join {voice_channel.mention}!")
return
if not voice_channel.permissions_for(message.guild.me).speak:
await message.channel.send(f"❌ I don't have permission to speak in {voice_channel.mention}!")
return
# Start session
try:
await message.channel.send(f"🎤 Joining {voice_channel.mention}...")
await voice_manager.start_session(
message.guild.id,
voice_channel,
message.channel # Use current text channel for prompts
)
embed = discord.Embed(
title="🎤 Voice Chat Active",
description=f"I've joined {voice_channel.mention}!",
color=discord.Color.from_rgb(134, 206, 203) # Miku teal
)
embed.add_field(
name="How to use",
value=f"Send messages in {message.channel.mention} to make me speak!",
inline=False
)
embed.add_field(
name="⚠️ Resource Mode",
value=(
"• Text inference on AMD GPU only\n"
"• Vision model disabled\n"
"• Image generation disabled\n"
"• Other text channels paused"
),
inline=False
)
embed.set_footer(text="Use !miku leave to end the session")
await message.channel.send(embed=embed)
logger.info(f"Voice session started by {message.author} in {voice_channel.name}")
except Exception as e:
await message.channel.send(f"❌ Failed to join voice: {str(e)}")
logger.error(f"Failed to start voice session: {e}", exc_info=True)
async def _handle_leave(message):
"""
Handle !miku leave command.
Leave voice channel and release all resources.
"""
if not voice_manager.active_session:
await message.channel.send("❌ I'm not in a voice channel!")
return
# Check if user is in the same guild as the active session
if voice_manager.active_session.guild_id != message.guild.id:
await message.channel.send("❌ I'm in a voice channel in a different server!")
return
try:
voice_channel_name = voice_manager.active_session.voice_channel.name
await message.channel.send("👋 Leaving voice channel...")
await voice_manager.end_session()
embed = discord.Embed(
title="👋 Voice Chat Ended",
description=f"Left {voice_channel_name}",
color=discord.Color.from_rgb(134, 206, 203)
)
embed.add_field(
name="✅ Resources Released",
value=(
"• Vision model available\n"
"• Image generation available\n"
"• Text channels resumed\n"
"• All features restored"
),
inline=False
)
await message.channel.send(embed=embed)
logger.info(f"Voice session ended by {message.author}")
except Exception as e:
await message.channel.send(f"⚠️ Error leaving voice: {str(e)}")
logger.error(f"Failed to end voice session: {e}", exc_info=True)
async def _handle_status(message):
"""
Handle !miku voice-status command.
Show current voice session status.
"""
if not voice_manager.active_session:
embed = discord.Embed(
title="🔇 No Active Voice Session",
description="I'm not currently in a voice channel.",
color=discord.Color.greyple()
)
embed.add_field(
name="To start",
value="Use `!miku join` while in a voice channel",
inline=False
)
await message.channel.send(embed=embed)
return
session = voice_manager.active_session
# Check if in same guild
if session.guild_id != message.guild.id:
await message.channel.send(" I'm in a voice channel in a different server.")
return
embed = discord.Embed(
title="🎤 Voice Session Active",
description=f"Currently in voice chat",
color=discord.Color.from_rgb(134, 206, 203)
)
embed.add_field(
name="Voice Channel",
value=session.voice_channel.mention,
inline=True
)
embed.add_field(
name="Prompt Channel",
value=session.text_channel.mention,
inline=True
)
embed.add_field(
name="📊 Resource Allocation",
value=(
"**GPU Usage:**\n"
"• AMD RX 6800: Text model + RVC\n"
"• GTX 1660: Soprano TTS only\n\n"
"**Blocked Features:**\n"
"• ❌ Vision model\n"
"• ❌ Image generation\n"
"• ❌ Bipolar mode\n"
"• ❌ Profile picture changes\n"
"• ⏸️ Autonomous engine\n"
"• ⏸️ Scheduled events\n"
"• 📦 Other text channels (queued)"
),
inline=False
)
embed.set_footer(text="Use !miku leave to end the session")
await message.channel.send(embed=embed)
async def _handle_test(message, args):
"""
Handle !miku test command.
Test TTS audio playback in the current voice session.
"""
session = voice_manager.active_session
if not session:
await message.channel.send("❌ No active voice session! Use `!miku join` first.")
return
if not session.audio_source:
await message.channel.send("❌ Audio source not connected!")
return
# Get test text from args or use default
test_text = " ".join(args) if args else "Hello! This is a test of my voice chat system."
try:
await message.channel.send(f"🎤 Speaking: *\"{test_text}\"*")
logger.info(f"Testing voice playback: {test_text}")
# Stream text to TTS via the audio source
await session.audio_source.stream_text(test_text)
await message.add_reaction("")
logger.info("✓ Test audio sent to TTS")
except Exception as e:
logger.error(f"Failed to test voice playback: {e}", exc_info=True)
await message.channel.send(f"❌ Error testing voice: {e}")
async def _handle_say(message, args):
"""
Handle !miku say command.
Send user message to LLM and speak the response in voice chat.
Phase 3: Text → LLM → Voice (STT deferred to later phase)
"""
# Validate args
if not args:
await message.channel.send("❌ Usage: `!miku say <your message>`")
return
# Check active voice session
session = voice_manager.active_session
if not session:
await message.channel.send("❌ No active voice session! Use `!miku join` first.")
return
if not session.audio_source:
await message.channel.send("❌ Audio source not connected!")
return
# Extract user message
user_message = " ".join(args)
try:
# Show processing indicator
await message.channel.send(f"💭 Processing: *\"{user_message}\"*")
logger.info(f"Voice say: user={message.author.name}, message={user_message}")
# Prepare LLM payload (based on query_llama logic)
from utils.llm import get_current_gpu_url
import globals
# Simple system prompt for voice responses
system_prompt = """You are Hatsune Miku, the virtual singer.
Respond naturally and concisely as Miku would in a voice conversation.
Keep responses short (1-3 sentences) since they will be spoken aloud."""
payload = {
"model": globals.TEXT_MODEL,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
],
"stream": True,
"temperature": 0.8,
"max_tokens": 200 # Shorter for voice
}
headers = {'Content-Type': 'application/json'}
llama_url = get_current_gpu_url()
logger.info(f"Streaming LLM from {llama_url}")
# Stream LLM response and send tokens to TTS
async with aiohttp.ClientSession() as http_session:
async with http_session.post(
f"{llama_url}/v1/chat/completions",
json=payload,
headers=headers,
timeout=aiohttp.ClientTimeout(total=60)
) as response:
if response.status != 200:
error_text = await response.text()
raise Exception(f"LLM error {response.status}: {error_text}")
# Process streaming response
full_response = ""
async for line in response.content:
line = line.decode('utf-8').strip()
if line.startswith('data: '):
data_str = line[6:] # Remove 'data: ' prefix
if data_str == '[DONE]':
break
try:
data = json.loads(data_str)
if 'choices' in data and len(data['choices']) > 0:
delta = data['choices'][0].get('delta', {})
content = delta.get('content', '')
if content:
# Send token to TTS
await session.audio_source.send_token(content)
full_response += content
except json.JSONDecodeError:
continue
# Send flush command to trigger synthesis of remaining tokens
await session.audio_source.flush()
# Show what Miku said
await message.channel.send(f"🎤 Miku: *\"{full_response.strip()}\"*")
logger.info(f"✓ Voice say complete: {full_response.strip()}")
await message.add_reaction("")
except Exception as e:
logger.error(f"Voice say failed: {e}", exc_info=True)
await message.channel.send(f"❌ Voice say failed: {str(e)}")