# voice.py """ Voice channel commands for Miku Discord bot. Handles joining, leaving, and status commands for voice chat sessions. """ import discord import aiohttp import json from utils.voice_manager import voice_manager from utils.logger import get_logger from utils.llm import get_current_gpu_url logger = get_logger('voice_commands') async def handle_voice_command(message, cmd, args): """ Handle voice-related commands. Args: message: Discord message object cmd: Command name (join, leave, voice-status, test) args: Command arguments """ if cmd == 'join': await _handle_join(message, args) elif cmd == 'leave': await _handle_leave(message) elif cmd == 'voice-status': await _handle_status(message) elif cmd == 'test': await _handle_test(message, args) elif cmd == 'say': await _handle_say(message, args) else: await message.channel.send(f"❌ Unknown voice command: `{cmd}`") async def _handle_join(message, args): """ Handle !miku join command. Join voice channel and start session with resource locks. """ # Get voice channel voice_channel = None if args and args[0].startswith('<#'): # Channel mentioned (e.g., !miku join #voice-chat) try: channel_id = int(args[0][2:-1]) voice_channel = message.guild.get_channel(channel_id) if not isinstance(voice_channel, discord.VoiceChannel): await message.channel.send("❌ That's not a voice channel!") return except (ValueError, AttributeError): await message.channel.send("❌ Invalid channel!") return else: # Use user's current voice channel if message.author.voice and message.author.voice.channel: voice_channel = message.author.voice.channel else: await message.channel.send( "❌ You must be in a voice channel! " "Or mention a voice channel like `!miku join #voice-chat`" ) return # Check permissions if not voice_channel.permissions_for(message.guild.me).connect: await message.channel.send(f"❌ I don't have permission to join {voice_channel.mention}!") return if not voice_channel.permissions_for(message.guild.me).speak: await message.channel.send(f"❌ I don't have permission to speak in {voice_channel.mention}!") return # Start session try: await message.channel.send(f"🎤 Joining {voice_channel.mention}...") await voice_manager.start_session( message.guild.id, voice_channel, message.channel # Use current text channel for prompts ) embed = discord.Embed( title="🎤 Voice Chat Active", description=f"I've joined {voice_channel.mention}!", color=discord.Color.from_rgb(134, 206, 203) # Miku teal ) embed.add_field( name="How to use", value=f"Send messages in {message.channel.mention} to make me speak!", inline=False ) embed.add_field( name="⚠️ Resource Mode", value=( "• Text inference on AMD GPU only\n" "• Vision model disabled\n" "• Image generation disabled\n" "• Other text channels paused" ), inline=False ) embed.set_footer(text="Use !miku leave to end the session") await message.channel.send(embed=embed) logger.info(f"Voice session started by {message.author} in {voice_channel.name}") except Exception as e: await message.channel.send(f"❌ Failed to join voice: {str(e)}") logger.error(f"Failed to start voice session: {e}", exc_info=True) async def _handle_leave(message): """ Handle !miku leave command. Leave voice channel and release all resources. """ if not voice_manager.active_session: await message.channel.send("❌ I'm not in a voice channel!") return # Check if user is in the same guild as the active session if voice_manager.active_session.guild_id != message.guild.id: await message.channel.send("❌ I'm in a voice channel in a different server!") return try: voice_channel_name = voice_manager.active_session.voice_channel.name await message.channel.send("👋 Leaving voice channel...") await voice_manager.end_session() embed = discord.Embed( title="👋 Voice Chat Ended", description=f"Left {voice_channel_name}", color=discord.Color.from_rgb(134, 206, 203) ) embed.add_field( name="✅ Resources Released", value=( "• Vision model available\n" "• Image generation available\n" "• Text channels resumed\n" "• All features restored" ), inline=False ) await message.channel.send(embed=embed) logger.info(f"Voice session ended by {message.author}") except Exception as e: await message.channel.send(f"⚠️ Error leaving voice: {str(e)}") logger.error(f"Failed to end voice session: {e}", exc_info=True) async def _handle_status(message): """ Handle !miku voice-status command. Show current voice session status. """ if not voice_manager.active_session: embed = discord.Embed( title="🔇 No Active Voice Session", description="I'm not currently in a voice channel.", color=discord.Color.greyple() ) embed.add_field( name="To start", value="Use `!miku join` while in a voice channel", inline=False ) await message.channel.send(embed=embed) return session = voice_manager.active_session # Check if in same guild if session.guild_id != message.guild.id: await message.channel.send("ℹ️ I'm in a voice channel in a different server.") return embed = discord.Embed( title="🎤 Voice Session Active", description=f"Currently in voice chat", color=discord.Color.from_rgb(134, 206, 203) ) embed.add_field( name="Voice Channel", value=session.voice_channel.mention, inline=True ) embed.add_field( name="Prompt Channel", value=session.text_channel.mention, inline=True ) embed.add_field( name="📊 Resource Allocation", value=( "**GPU Usage:**\n" "• AMD RX 6800: Text model + RVC\n" "• GTX 1660: Soprano TTS only\n\n" "**Blocked Features:**\n" "• ❌ Vision model\n" "• ❌ Image generation\n" "• ❌ Bipolar mode\n" "• ❌ Profile picture changes\n" "• ⏸️ Autonomous engine\n" "• ⏸️ Scheduled events\n" "• 📦 Other text channels (queued)" ), inline=False ) embed.set_footer(text="Use !miku leave to end the session") await message.channel.send(embed=embed) async def _handle_test(message, args): """ Handle !miku test command. Test TTS audio playback in the current voice session. """ session = voice_manager.active_session if not session: await message.channel.send("❌ No active voice session! Use `!miku join` first.") return if not session.audio_source: await message.channel.send("❌ Audio source not connected!") return # Get test text from args or use default test_text = " ".join(args) if args else "Hello! This is a test of my voice chat system." try: await message.channel.send(f"🎤 Speaking: *\"{test_text}\"*") logger.info(f"Testing voice playback: {test_text}") # Stream text to TTS via the audio source await session.audio_source.stream_text(test_text) await message.add_reaction("✅") logger.info("✓ Test audio sent to TTS") except Exception as e: logger.error(f"Failed to test voice playback: {e}", exc_info=True) await message.channel.send(f"❌ Error testing voice: {e}") async def _handle_say(message, args): """ Handle !miku say command. Send user message to LLM and speak the response in voice chat. Phase 3: Text → LLM → Voice (STT deferred to later phase) """ # Validate args if not args: await message.channel.send("❌ Usage: `!miku say `") return # Check active voice session session = voice_manager.active_session if not session: await message.channel.send("❌ No active voice session! Use `!miku join` first.") return if not session.audio_source: await message.channel.send("❌ Audio source not connected!") return # Extract user message user_message = " ".join(args) try: # Show processing indicator await message.channel.send(f"💭 Processing: *\"{user_message}\"*") logger.info(f"Voice say: user={message.author.name}, message={user_message}") # Prepare LLM payload (based on query_llama logic) from utils.llm import get_current_gpu_url import globals # Simple system prompt for voice responses system_prompt = """You are Hatsune Miku, the virtual singer. Respond naturally and concisely as Miku would in a voice conversation. Keep responses short (1-3 sentences) since they will be spoken aloud.""" payload = { "model": globals.TEXT_MODEL, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message} ], "stream": True, "temperature": 0.8, "max_tokens": 200 # Shorter for voice } headers = {'Content-Type': 'application/json'} llama_url = get_current_gpu_url() logger.info(f"Streaming LLM from {llama_url}") # Stream LLM response and send tokens to TTS async with aiohttp.ClientSession() as http_session: async with http_session.post( f"{llama_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=60) ) as response: if response.status != 200: error_text = await response.text() raise Exception(f"LLM error {response.status}: {error_text}") # Process streaming response full_response = "" async for line in response.content: line = line.decode('utf-8').strip() if line.startswith('data: '): data_str = line[6:] # Remove 'data: ' prefix if data_str == '[DONE]': break try: data = json.loads(data_str) if 'choices' in data and len(data['choices']) > 0: delta = data['choices'][0].get('delta', {}) content = delta.get('content', '') if content: # Send token to TTS await session.audio_source.send_token(content) full_response += content except json.JSONDecodeError: continue # Send flush command to trigger synthesis of remaining tokens await session.audio_source.flush() # Show what Miku said await message.channel.send(f"🎤 Miku: *\"{full_response.strip()}\"*") logger.info(f"✓ Voice say complete: {full_response.strip()}") await message.add_reaction("✅") except Exception as e: logger.error(f"Voice say failed: {e}", exc_info=True) await message.channel.send(f"❌ Voice say failed: {str(e)}")