miku-discord/bot/commands/voice.py

# voice.py
"""
Voice channel commands for Miku Discord bot.
Handles joining, leaving, and status commands for voice chat sessions.
"""

import discord
import aiohttp
import json
from utils.voice_manager import voice_manager
from utils.logger import get_logger
from utils.llm import get_current_gpu_url

logger = get_logger('voice_commands')


async def handle_voice_command(message, cmd, args):
    """
    Handle voice-related commands.

    Args:
        message: Discord message object
        cmd: Command name (join, leave, voice-status, test)
        args: Command arguments
    """

    if cmd == 'join':
        await _handle_join(message, args)

    elif cmd == 'leave':
        await _handle_leave(message)

    elif cmd == 'voice-status':
        await _handle_status(message)

    elif cmd == 'test':
        await _handle_test(message, args)

    elif cmd == 'say':
        await _handle_say(message, args)

    elif cmd == 'listen':
        await _handle_listen(message, args)

    elif cmd == 'stop-listening':
        await _handle_stop_listening(message, args)

    else:
        await message.channel.send(f"❌ Unknown voice command: `{cmd}`")


async def _handle_join(message, args):
    """
    Handle !miku join command.
    Join voice channel and start session with resource locks.
    """
    # Get voice channel
    voice_channel = None

    if args and args[0].startswith('<#'):
        # Channel mentioned (e.g., !miku join #voice-chat)
        try:
            channel_id = int(args[0][2:-1])
            voice_channel = message.guild.get_channel(channel_id)

            if not isinstance(voice_channel, discord.VoiceChannel):
                await message.channel.send("❌ That's not a voice channel!")
                return
        except (ValueError, AttributeError):
            await message.channel.send("❌ Invalid channel!")
            return

    else:
        # Use user's current voice channel
        if message.author.voice and message.author.voice.channel:
            voice_channel = message.author.voice.channel
        else:
            await message.channel.send(
                "❌ You must be in a voice channel! "
                "Or mention a voice channel like `!miku join #voice-chat`"
            )
            return

    # Check permissions
    if not voice_channel.permissions_for(message.guild.me).connect:
        await message.channel.send(f"❌ I don't have permission to join {voice_channel.mention}!")
        return

    if not voice_channel.permissions_for(message.guild.me).speak:
        await message.channel.send(f"❌ I don't have permission to speak in {voice_channel.mention}!")
        return

    # Start session
    try:
        await message.channel.send(f"🎤 Joining {voice_channel.mention}...")

        await voice_manager.start_session(
            message.guild.id,
            voice_channel,
            message.channel  # Use current text channel for prompts
        )

        embed = discord.Embed(
            title="🎤 Voice Chat Active",
            description=f"I've joined {voice_channel.mention}!",
            color=discord.Color.from_rgb(134, 206, 203)  # Miku teal
        )
        embed.add_field(
            name="How to use",
            value=f"Send messages in {message.channel.mention} to make me speak!",
            inline=False
        )
        embed.add_field(
            name="⚠️ Resource Mode",
            value=(
                "• Text inference on AMD GPU only\n"
                "• Vision model disabled\n"
                "• Image generation disabled\n"
                "• Other text channels paused"
            ),
            inline=False
        )
        embed.set_footer(text="Use !miku leave to end the session")

        await message.channel.send(embed=embed)

        logger.info(f"Voice session started by {message.author} in {voice_channel.name}")

    except Exception as e:
        await message.channel.send(f"❌ Failed to join voice: {str(e)}")
        logger.error(f"Failed to start voice session: {e}", exc_info=True)


async def _handle_leave(message):
    """
    Handle !miku leave command.
    Leave voice channel and release all resources.
    """
    if not voice_manager.active_session:
        await message.channel.send("❌ I'm not in a voice channel!")
        return

    # Check if user is in the same guild as the active session
    if voice_manager.active_session.guild_id != message.guild.id:
        await message.channel.send("❌ I'm in a voice channel in a different server!")
        return

    try:
        voice_channel_name = voice_manager.active_session.voice_channel.name

        await message.channel.send("👋 Leaving voice channel...")

        await voice_manager.end_session()

        embed = discord.Embed(
            title="👋 Voice Chat Ended",
            description=f"Left {voice_channel_name}",
            color=discord.Color.from_rgb(134, 206, 203)
        )
        embed.add_field(
            name="✅ Resources Released",
            value=(
                "• Vision model available\n"
                "• Image generation available\n"
                "• Text channels resumed\n"
                "• All features restored"
            ),
            inline=False
        )

        await message.channel.send(embed=embed)

        logger.info(f"Voice session ended by {message.author}")

    except Exception as e:
        await message.channel.send(f"⚠️ Error leaving voice: {str(e)}")
        logger.error(f"Failed to end voice session: {e}", exc_info=True)


async def _handle_status(message):
    """
    Handle !miku voice-status command.
    Show current voice session status.
    """
    if not voice_manager.active_session:
        embed = discord.Embed(
            title="🔇 No Active Voice Session",
            description="I'm not currently in a voice channel.",
            color=discord.Color.greyple()
        )
        embed.add_field(
            name="To start",
            value="Use `!miku join` while in a voice channel",
            inline=False
        )
        await message.channel.send(embed=embed)
        return

    session = voice_manager.active_session

    # Check if in same guild
    if session.guild_id != message.guild.id:
        await message.channel.send("ℹ️ I'm in a voice channel in a different server.")
        return

    embed = discord.Embed(
        title="🎤 Voice Session Active",
        description=f"Currently in voice chat",
        color=discord.Color.from_rgb(134, 206, 203)
    )

    embed.add_field(
        name="Voice Channel",
        value=session.voice_channel.mention,
        inline=True
    )

    embed.add_field(
        name="Prompt Channel",
        value=session.text_channel.mention,
        inline=True
    )

    embed.add_field(
        name="📊 Resource Allocation",
        value=(
            "**GPU Usage:**\n"
            "• AMD RX 6800: Text model + RVC\n"
            "• GTX 1660: Soprano TTS only\n\n"
            "**Blocked Features:**\n"
            "• ❌ Vision model\n"
            "• ❌ Image generation\n"
            "• ❌ Bipolar mode\n"
            "• ❌ Profile picture changes\n"
            "• ⏸️ Autonomous engine\n"
            "• ⏸️ Scheduled events\n"
            "• 📦 Other text channels (queued)"
        ),
        inline=False
    )

    embed.set_footer(text="Use !miku leave to end the session")

    await message.channel.send(embed=embed)


async def _handle_test(message, args):
    """
    Handle !miku test command.
    Test TTS audio playback in the current voice session.
    """
    session = voice_manager.active_session

    if not session:
        await message.channel.send("❌ No active voice session! Use `!miku join` first.")
        return

    if not session.audio_source:
        await message.channel.send("❌ Audio source not connected!")
        return

    # Get test text from args or use default
    test_text = " ".join(args) if args else "Hello! This is a test of my voice chat system."

    try:
        await message.channel.send(f"🎤 Speaking: *\"{test_text}\"*")
        logger.info(f"Testing voice playback: {test_text}")

        # Stream text to TTS via the audio source
        await session.audio_source.stream_text(test_text)

        await message.add_reaction("✅")
        logger.info("✓ Test audio sent to TTS")

    except Exception as e:
        logger.error(f"Failed to test voice playback: {e}", exc_info=True)
        await message.channel.send(f"❌ Error testing voice: {e}")


async def _handle_say(message, args):
    """
    Handle !miku say command.
    Send user message to LLM and speak the response in voice chat.

    Phase 3: Text → LLM → Voice (STT deferred to later phase)
    """
    # Validate args
    if not args:
        await message.channel.send("❌ Usage: `!miku say <your message>`")
        return

    # Check active voice session
    session = voice_manager.active_session
    if not session:
        await message.channel.send("❌ No active voice session! Use `!miku join` first.")
        return

    if not session.audio_source:
        await message.channel.send("❌ Audio source not connected!")
        return

    # Extract user message
    user_message = " ".join(args)

    try:
        # Show processing indicator
        await message.channel.send(f"💭 Processing: *\"{user_message}\"*")
        logger.info(f"Voice say: user={message.author.name}, message={user_message}")

        # Prepare LLM payload (based on query_llama logic)
        from utils.llm import get_current_gpu_url
        import globals

        # Simple system prompt for voice responses
        system_prompt = """You are Hatsune Miku, the virtual singer.
Respond naturally and concisely as Miku would in a voice conversation.
Keep responses short (1-3 sentences) since they will be spoken aloud."""

        payload = {
            "model": globals.TEXT_MODEL,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_message}
            ],
            "stream": True,
            "temperature": 0.8,
            "max_tokens": 200  # Shorter for voice
        }

        headers = {'Content-Type': 'application/json'}
        llama_url = get_current_gpu_url()

        logger.info(f"Streaming LLM from {llama_url}")

        # Stream LLM response and send tokens to TTS
        async with aiohttp.ClientSession() as http_session:
            async with http_session.post(
                f"{llama_url}/v1/chat/completions",
                json=payload,
                headers=headers,
                timeout=aiohttp.ClientTimeout(total=60)
            ) as response:
                if response.status != 200:
                    error_text = await response.text()
                    raise Exception(f"LLM error {response.status}: {error_text}")

                # Process streaming response
                full_response = ""
                async for line in response.content:
                    line = line.decode('utf-8').strip()
                    if line.startswith('data: '):
                        data_str = line[6:]  # Remove 'data: ' prefix
                        if data_str == '[DONE]':
                            break

                        try:
                            data = json.loads(data_str)
                            if 'choices' in data and len(data['choices']) > 0:
                                delta = data['choices'][0].get('delta', {})
                                content = delta.get('content', '')
                                if content:
                                    # Send token to TTS
                                    await session.audio_source.send_token(content)
                                    full_response += content
                        except json.JSONDecodeError:
                            continue

                # Send flush command to trigger synthesis of remaining tokens
                await session.audio_source.flush()

                # Show what Miku said
                await message.channel.send(f"🎤 Miku: *\"{full_response.strip()}\"*")
                logger.info(f"✓ Voice say complete: {full_response.strip()}")
                await message.add_reaction("✅")

    except Exception as e:
        logger.error(f"Failed to generate voice response: {e}", exc_info=True)
        await message.channel.send(f"❌ Error generating voice response: {e}")


async def _handle_listen(message, args):
    """
    Handle !miku listen command.
    Start listening to a user's voice for STT.

    Usage:
        !miku listen - Start listening to command author
        !miku listen @user - Start listening to mentioned user
    """
    # Check if Miku is in voice channel
    session = voice_manager.active_session

    if not session or not session.voice_client or not session.voice_client.is_connected():
        await message.channel.send("❌ I'm not in a voice channel! Use `!miku join` first.")
        return

    # Determine target user
    target_user = None
    if args and len(message.mentions) > 0:
        # Listen to mentioned user
        target_user = message.mentions[0]
    else:
        # Listen to command author
        target_user = message.author

    # Check if user is in voice channel
    if not target_user.voice or not target_user.voice.channel:
        await message.channel.send(f"❌ {target_user.mention} is not in a voice channel!")
        return

    # Check if user is in same channel as Miku
    if target_user.voice.channel.id != session.voice_client.channel.id:
        await message.channel.send(
            f"❌ {target_user.mention} must be in the same voice channel as me!"
        )
        return

    try:
        # Start listening to user
        await session.start_listening(target_user)
        await message.channel.send(
            f"👂 Now listening to {target_user.mention}'s voice! "
            f"Speak to me and I'll respond. Use `!miku stop-listening` to stop."
        )
        await message.add_reaction("👂")
        logger.info(f"Started listening to user {target_user.id} ({target_user.name})")

    except Exception as e:
        logger.error(f"Failed to start listening: {e}", exc_info=True)
        await message.channel.send(f"❌ Failed to start listening: {str(e)}")


async def _handle_stop_listening(message, args):
    """
    Handle !miku stop-listening command.
    Stop listening to a user's voice.

    Usage:
        !miku stop-listening - Stop listening to command author
        !miku stop-listening @user - Stop listening to mentioned user
    """
    # Check if Miku is in voice channel
    session = voice_manager.active_session

    if not session:
        await message.channel.send("❌ I'm not in a voice channel!")
        return

    # Determine target user
    target_user = None
    if args and len(message.mentions) > 0:
        # Stop listening to mentioned user
        target_user = message.mentions[0]
    else:
        # Stop listening to command author
        target_user = message.author

    try:
        # Stop listening to user
        await session.stop_listening(target_user.id)
        await message.channel.send(f"🔇 Stopped listening to {target_user.mention}.")
        await message.add_reaction("🔇")
        logger.info(f"Stopped listening to user {target_user.id} ({target_user.name})")

    except Exception as e:
        logger.error(f"Failed to stop listening: {e}", exc_info=True)
        await message.channel.send(f"❌ Failed to stop listening: {str(e)}")