bot/utils/voice_manager.py

# voice_manager.py
"""
Voice session manager for Miku Discord bot.
Handles Discord voice channel connections, resource locking, and feature blocking during voice sessions.

During a voice session:
- GPU switches to AMD for text inference only
- Vision model is blocked (keeps GTX 1660 for TTS)
- Image generation is blocked
- Bipolar mode interactions are disabled
- Profile picture switching is locked
- Autonomous engine is paused
- Scheduled events are paused
- Text channels are paused (messages queued)
"""

import asyncio
import json
import os
from typing import Optional
import discord
import globals
from utils.logger import get_logger

logger = get_logger('voice_manager')


class VoiceSessionManager:
    """
    Singleton manager for voice chat sessions.
    Ensures only one voice session active at a time and manages all resource locks.
    """
    
    _instance = None
    
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance._initialized = False
        return cls._instance
    
    def __init__(self):
        if self._initialized:
            return
        
        self.active_session: Optional['VoiceSession'] = None
        self.session_lock = asyncio.Lock()
        self._initialized = True
        logger.info("VoiceSessionManager initialized")
    
    async def start_session(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
        """
        Start a voice session with full resource locking.
        
        Args:
            guild_id: Discord guild ID
            voice_channel: Voice channel to join
            text_channel: Text channel for voice prompts
            
        Raises:
            Exception: If session already active or resources can't be locked
        """
        async with self.session_lock:
            if self.active_session:
                raise Exception("Voice session already active")
            
            logger.info(f"Starting voice session in {voice_channel.name} (guild {guild_id})")
            
            try:
                # 1. Switch to AMD GPU for text inference
                await self._switch_to_amd_gpu()
                
                # 2. Block vision model loading
                await self._block_vision_model()
                
                # 3. Disable image generation (ComfyUI)
                await self._disable_image_generation()
                
                # 4. Pause text channel inference (queue messages)
                await self._pause_text_channels()
                
                # 5. Disable bipolar mode interactions (Miku/Evil Miku arguments)
                await self._disable_bipolar_mode()
                
                # 6. Disable profile picture switching
                await self._disable_profile_picture_switching()
                
                # 7. Pause autonomous engine
                await self._pause_autonomous_engine()
                
                # 8. Pause scheduled events
                await self._pause_scheduled_events()
                
                # 9. Pause figurine notifier
                await self._pause_figurine_notifier()
                
                # 10. Create and connect voice session
                self.active_session = VoiceSession(guild_id, voice_channel, text_channel)
                # Note: Actual voice connection will be implemented in Phase 2
                
                logger.info(f"✓ Voice session started successfully")
                
            except Exception as e:
                logger.error(f"Failed to start voice session: {e}", exc_info=True)
                # Cleanup on failure
                await self._cleanup_failed_start()
                raise
    
    async def end_session(self):
        """
        End voice session and release all resources.
        """
        async with self.session_lock:
            if not self.active_session:
                logger.warning("No active voice session to end")
                return
            
            logger.info("Ending voice session...")
            
            try:
                # 1. Disconnect from voice (Phase 2 implementation)
                # await self.active_session.disconnect()
                
                # 2. Resume text channel inference
                await self._resume_text_channels()
                
                # 3. Unblock vision model
                await self._unblock_vision_model()
                
                # 4. Re-enable image generation
                await self._enable_image_generation()
                
                # 5. Re-enable bipolar mode interactions
                await self._enable_bipolar_mode()
                
                # 6. Re-enable profile picture switching
                await self._enable_profile_picture_switching()
                
                # 7. Resume autonomous engine
                await self._resume_autonomous_engine()
                
                # 8. Resume scheduled events
                await self._resume_scheduled_events()
                
                # 9. Resume figurine notifier
                await self._resume_figurine_notifier()
                
                # 10. Clear active session
                self.active_session = None
                
                logger.info("✓ Voice session ended successfully, all resources released")
                
            except Exception as e:
                logger.error(f"Error during session cleanup: {e}", exc_info=True)
                # Force clear session even on error
                self.active_session = None
                raise
    
    # ==================== Resource Locking Methods ====================
    
    async def _switch_to_amd_gpu(self):
        """Switch text inference to AMD GPU (RX 6800)"""
        try:
            gpu_state_file = os.path.join("memory", "gpu_state.json")
            os.makedirs("memory", exist_ok=True)
            
            with open(gpu_state_file, "w") as f:
                json.dump({"current_gpu": "amd", "reason": "voice_session"}, f)
            
            logger.info("✓ Switched to AMD GPU for text inference")
        except Exception as e:
            logger.error(f"Failed to switch GPU: {e}")
            raise
    
    async def _block_vision_model(self):
        """Prevent vision model from loading during voice session"""
        globals.VISION_MODEL_BLOCKED = True
        logger.info("✓ Vision model blocked")
    
    async def _unblock_vision_model(self):
        """Allow vision model to load after voice session"""
        globals.VISION_MODEL_BLOCKED = False
        logger.info("✓ Vision model unblocked")
    
    async def _disable_image_generation(self):
        """Block ComfyUI image generation during voice session"""
        globals.IMAGE_GENERATION_BLOCKED = True
        globals.IMAGE_GENERATION_BLOCK_MESSAGE = (
            "🎤 I can't draw right now, I'm talking in voice chat! "
            "Ask me again after I leave the voice channel."
        )
        logger.info("✓ Image generation disabled")
    
    async def _enable_image_generation(self):
        """Re-enable image generation after voice session"""
        globals.IMAGE_GENERATION_BLOCKED = False
        globals.IMAGE_GENERATION_BLOCK_MESSAGE = None
        logger.info("✓ Image generation re-enabled")
    
    async def _pause_text_channels(self):
        """Queue text messages instead of processing during voice session"""
        globals.VOICE_SESSION_ACTIVE = True
        globals.TEXT_MESSAGE_QUEUE = []
        logger.info("✓ Text channels paused (messages will be queued)")
    
    async def _resume_text_channels(self):
        """Process queued messages after voice session"""
        globals.VOICE_SESSION_ACTIVE = False
        queued_count = len(globals.TEXT_MESSAGE_QUEUE)
        
        if queued_count > 0:
            logger.info(f"Resuming text channels, {queued_count} messages queued")
            # TODO: Process queue in Phase 2 (need message handler integration)
            # For now, just clear the queue
            globals.TEXT_MESSAGE_QUEUE = []
            logger.warning(f"Discarded {queued_count} queued messages (queue processing not yet implemented)")
        else:
            logger.info("✓ Text channels resumed (no queued messages)")
    
    async def _disable_bipolar_mode(self):
        """Prevent Miku/Evil Miku arguments during voice session"""
        try:
            from utils.bipolar_mode import pause_bipolar_interactions
            pause_bipolar_interactions()
            logger.info("✓ Bipolar mode interactions disabled")
        except ImportError:
            logger.warning("bipolar_mode module not found, skipping")
        except AttributeError:
            logger.warning("pause_bipolar_interactions not implemented yet, skipping")
    
    async def _enable_bipolar_mode(self):
        """Re-enable Miku/Evil Miku arguments after voice session"""
        try:
            from utils.bipolar_mode import resume_bipolar_interactions
            resume_bipolar_interactions()
            logger.info("✓ Bipolar mode interactions re-enabled")
        except ImportError:
            logger.warning("bipolar_mode module not found, skipping")
        except AttributeError:
            logger.warning("resume_bipolar_interactions not implemented yet, skipping")
    
    async def _disable_profile_picture_switching(self):
        """Lock profile picture during voice session"""
        try:
            from utils.profile_picture_manager import profile_picture_manager
            if hasattr(profile_picture_manager, 'lock_switching'):
                profile_picture_manager.lock_switching()
                logger.info("✓ Profile picture switching disabled")
            else:
                logger.warning("profile_picture_manager.lock_switching not implemented yet, skipping")
        except ImportError:
            logger.warning("profile_picture_manager module not found, skipping")
    
    async def _enable_profile_picture_switching(self):
        """Unlock profile picture after voice session"""
        try:
            from utils.profile_picture_manager import profile_picture_manager
            if hasattr(profile_picture_manager, 'unlock_switching'):
                profile_picture_manager.unlock_switching()
                logger.info("✓ Profile picture switching re-enabled")
            else:
                logger.warning("profile_picture_manager.unlock_switching not implemented yet, skipping")
        except ImportError:
            logger.warning("profile_picture_manager module not found, skipping")
    
    async def _pause_autonomous_engine(self):
        """Pause autonomous message generation during voice session"""
        try:
            from utils.autonomous import pause_autonomous_system
            pause_autonomous_system()
            logger.info("✓ Autonomous engine paused")
        except ImportError:
            logger.warning("autonomous module not found, skipping")
        except AttributeError:
            logger.warning("pause_autonomous_system not implemented yet, skipping")
    
    async def _resume_autonomous_engine(self):
        """Resume autonomous message generation after voice session"""
        try:
            from utils.autonomous import resume_autonomous_system
            resume_autonomous_system()
            logger.info("✓ Autonomous engine resumed")
        except ImportError:
            logger.warning("autonomous module not found, skipping")
        except AttributeError:
            logger.warning("resume_autonomous_system not implemented yet, skipping")
    
    async def _pause_scheduled_events(self):
        """Pause all scheduled jobs during voice session"""
        try:
            globals.scheduler.pause()
            logger.info("✓ Scheduled events paused")
        except Exception as e:
            logger.error(f"Failed to pause scheduler: {e}")
    
    async def _resume_scheduled_events(self):
        """Resume scheduled jobs after voice session"""
        try:
            globals.scheduler.resume()
            logger.info("✓ Scheduled events resumed")
        except Exception as e:
            logger.error(f"Failed to resume scheduler: {e}")
    
    async def _pause_figurine_notifier(self):
        """Pause figurine notifications during voice session"""
        try:
            # Assuming figurine notifier is a scheduled job
            globals.scheduler.pause_job('figurine_notifier')
            logger.info("✓ Figurine notifier paused")
        except Exception as e:
            # Job might not exist, that's okay
            logger.debug(f"Could not pause figurine notifier (may not exist): {e}")
    
    async def _resume_figurine_notifier(self):
        """Resume figurine notifications after voice session"""
        try:
            globals.scheduler.resume_job('figurine_notifier')
            logger.info("✓ Figurine notifier resumed")
        except Exception as e:
            # Job might not exist, that's okay
            logger.debug(f"Could not resume figurine notifier (may not exist): {e}")
    
    async def _cleanup_failed_start(self):
        """Cleanup resources if session start fails"""
        logger.warning("Cleaning up after failed session start...")
        try:
            await self._unblock_vision_model()
            await self._enable_image_generation()
            await self._resume_text_channels()
            await self._enable_bipolar_mode()
            await self._enable_profile_picture_switching()
            await self._resume_autonomous_engine()
            await self._resume_scheduled_events()
            await self._resume_figurine_notifier()
        except Exception as e:
            logger.error(f"Error during cleanup: {e}")


class VoiceSession:
    """
    Represents an active voice chat session.
    Phase 1: Basic structure only, voice connection in Phase 2.
    """
    
    def __init__(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
        self.guild_id = guild_id
        self.voice_channel = voice_channel
        self.text_channel = text_channel
        self.voice_client: Optional[discord.VoiceClient] = None
        self.active = False
        
        logger.info(f"VoiceSession created for {voice_channel.name} in guild {guild_id}")
    
    # Phase 2: Implement voice connection, audio streaming, TTS integration


# Global singleton instance
voice_manager = VoiceSessionManager()
Untested Phase 1 (Foundation & Resource management) of voice chat integration 2026-01-16 13:01:08 +02:00			`# voice_manager.py`
			`"""`
			`Voice session manager for Miku Discord bot.`
			`Handles Discord voice channel connections, resource locking, and feature blocking during voice sessions.`

			`During a voice session:`
			`- GPU switches to AMD for text inference only`
			`- Vision model is blocked (keeps GTX 1660 for TTS)`
			`- Image generation is blocked`
			`- Bipolar mode interactions are disabled`
			`- Profile picture switching is locked`
			`- Autonomous engine is paused`
			`- Scheduled events are paused`
			`- Text channels are paused (messages queued)`
			`"""`

			`import asyncio`
			`import json`
			`import os`
			`from typing import Optional`
			`import discord`
			`import globals`
			`from utils.logger import get_logger`

			`logger = get_logger('voice_manager')`


			`class VoiceSessionManager:`
			`"""`
			`Singleton manager for voice chat sessions.`
			`Ensures only one voice session active at a time and manages all resource locks.`
			`"""`

			`_instance = None`

			`def __new__(cls):`
			`if cls._instance is None:`
			`cls._instance = super().__new__(cls)`
			`cls._instance._initialized = False`
			`return cls._instance`

			`def __init__(self):`
			`if self._initialized:`
			`return`

			`self.active_session: Optional['VoiceSession'] = None`
			`self.session_lock = asyncio.Lock()`
			`self._initialized = True`
			`logger.info("VoiceSessionManager initialized")`

			`async def start_session(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):`
			`"""`
			`Start a voice session with full resource locking.`

			`Args:`
			`guild_id: Discord guild ID`
			`voice_channel: Voice channel to join`
			`text_channel: Text channel for voice prompts`

			`Raises:`
			`Exception: If session already active or resources can't be locked`
			`"""`
			`async with self.session_lock:`
			`if self.active_session:`
			`raise Exception("Voice session already active")`

			`logger.info(f"Starting voice session in {voice_channel.name} (guild {guild_id})")`

			`try:`
			`# 1. Switch to AMD GPU for text inference`
			`await self._switch_to_amd_gpu()`

			`# 2. Block vision model loading`
			`await self._block_vision_model()`

			`# 3. Disable image generation (ComfyUI)`
			`await self._disable_image_generation()`

			`# 4. Pause text channel inference (queue messages)`
			`await self._pause_text_channels()`

			`# 5. Disable bipolar mode interactions (Miku/Evil Miku arguments)`
			`await self._disable_bipolar_mode()`

			`# 6. Disable profile picture switching`
			`await self._disable_profile_picture_switching()`

			`# 7. Pause autonomous engine`
			`await self._pause_autonomous_engine()`

			`# 8. Pause scheduled events`
			`await self._pause_scheduled_events()`

			`# 9. Pause figurine notifier`
			`await self._pause_figurine_notifier()`

			`# 10. Create and connect voice session`
			`self.active_session = VoiceSession(guild_id, voice_channel, text_channel)`
			`# Note: Actual voice connection will be implemented in Phase 2`

			`logger.info(f"✓ Voice session started successfully")`

			`except Exception as e:`
			`logger.error(f"Failed to start voice session: {e}", exc_info=True)`
			`# Cleanup on failure`
			`await self._cleanup_failed_start()`
			`raise`

			`async def end_session(self):`
			`"""`
			`End voice session and release all resources.`
			`"""`
			`async with self.session_lock:`
			`if not self.active_session:`
			`logger.warning("No active voice session to end")`
			`return`

			`logger.info("Ending voice session...")`

			`try:`
			`# 1. Disconnect from voice (Phase 2 implementation)`
			`# await self.active_session.disconnect()`

			`# 2. Resume text channel inference`
			`await self._resume_text_channels()`

			`# 3. Unblock vision model`
			`await self._unblock_vision_model()`

			`# 4. Re-enable image generation`
			`await self._enable_image_generation()`

			`# 5. Re-enable bipolar mode interactions`
			`await self._enable_bipolar_mode()`

			`# 6. Re-enable profile picture switching`
			`await self._enable_profile_picture_switching()`

			`# 7. Resume autonomous engine`
			`await self._resume_autonomous_engine()`

			`# 8. Resume scheduled events`
			`await self._resume_scheduled_events()`

			`# 9. Resume figurine notifier`
			`await self._resume_figurine_notifier()`

			`# 10. Clear active session`
			`self.active_session = None`

			`logger.info("✓ Voice session ended successfully, all resources released")`

			`except Exception as e:`
			`logger.error(f"Error during session cleanup: {e}", exc_info=True)`
			`# Force clear session even on error`
			`self.active_session = None`
			`raise`

			`# ==================== Resource Locking Methods ====================`

			`async def _switch_to_amd_gpu(self):`
			`"""Switch text inference to AMD GPU (RX 6800)"""`
			`try:`
			`gpu_state_file = os.path.join("memory", "gpu_state.json")`
			`os.makedirs("memory", exist_ok=True)`

			`with open(gpu_state_file, "w") as f:`
			`json.dump({"current_gpu": "amd", "reason": "voice_session"}, f)`

			`logger.info("✓ Switched to AMD GPU for text inference")`
			`except Exception as e:`
			`logger.error(f"Failed to switch GPU: {e}")`
			`raise`

			`async def _block_vision_model(self):`
			`"""Prevent vision model from loading during voice session"""`
			`globals.VISION_MODEL_BLOCKED = True`
			`logger.info("✓ Vision model blocked")`

			`async def _unblock_vision_model(self):`
			`"""Allow vision model to load after voice session"""`
			`globals.VISION_MODEL_BLOCKED = False`
			`logger.info("✓ Vision model unblocked")`

			`async def _disable_image_generation(self):`
			`"""Block ComfyUI image generation during voice session"""`
			`globals.IMAGE_GENERATION_BLOCKED = True`
			`globals.IMAGE_GENERATION_BLOCK_MESSAGE = (`
			`"🎤 I can't draw right now, I'm talking in voice chat! "`
			`"Ask me again after I leave the voice channel."`
			`)`
			`logger.info("✓ Image generation disabled")`

			`async def _enable_image_generation(self):`
			`"""Re-enable image generation after voice session"""`
			`globals.IMAGE_GENERATION_BLOCKED = False`
			`globals.IMAGE_GENERATION_BLOCK_MESSAGE = None`
			`logger.info("✓ Image generation re-enabled")`

			`async def _pause_text_channels(self):`
			`"""Queue text messages instead of processing during voice session"""`
			`globals.VOICE_SESSION_ACTIVE = True`
			`globals.TEXT_MESSAGE_QUEUE = []`
			`logger.info("✓ Text channels paused (messages will be queued)")`

			`async def _resume_text_channels(self):`
			`"""Process queued messages after voice session"""`
			`globals.VOICE_SESSION_ACTIVE = False`
			`queued_count = len(globals.TEXT_MESSAGE_QUEUE)`

			`if queued_count > 0:`
			`logger.info(f"Resuming text channels, {queued_count} messages queued")`
			`# TODO: Process queue in Phase 2 (need message handler integration)`
			`# For now, just clear the queue`
			`globals.TEXT_MESSAGE_QUEUE = []`
			`logger.warning(f"Discarded {queued_count} queued messages (queue processing not yet implemented)")`
			`else:`
			`logger.info("✓ Text channels resumed (no queued messages)")`

			`async def _disable_bipolar_mode(self):`
			`"""Prevent Miku/Evil Miku arguments during voice session"""`
			`try:`
			`from utils.bipolar_mode import pause_bipolar_interactions`
			`pause_bipolar_interactions()`
			`logger.info("✓ Bipolar mode interactions disabled")`
			`except ImportError:`
			`logger.warning("bipolar_mode module not found, skipping")`
			`except AttributeError:`
			`logger.warning("pause_bipolar_interactions not implemented yet, skipping")`

			`async def _enable_bipolar_mode(self):`
			`"""Re-enable Miku/Evil Miku arguments after voice session"""`
			`try:`
			`from utils.bipolar_mode import resume_bipolar_interactions`
			`resume_bipolar_interactions()`
			`logger.info("✓ Bipolar mode interactions re-enabled")`
			`except ImportError:`
			`logger.warning("bipolar_mode module not found, skipping")`
			`except AttributeError:`
			`logger.warning("resume_bipolar_interactions not implemented yet, skipping")`

			`async def _disable_profile_picture_switching(self):`
			`"""Lock profile picture during voice session"""`
			`try:`
			`from utils.profile_picture_manager import profile_picture_manager`
			`if hasattr(profile_picture_manager, 'lock_switching'):`
			`profile_picture_manager.lock_switching()`
			`logger.info("✓ Profile picture switching disabled")`
			`else:`
			`logger.warning("profile_picture_manager.lock_switching not implemented yet, skipping")`
			`except ImportError:`
			`logger.warning("profile_picture_manager module not found, skipping")`

			`async def _enable_profile_picture_switching(self):`
			`"""Unlock profile picture after voice session"""`
			`try:`
			`from utils.profile_picture_manager import profile_picture_manager`
			`if hasattr(profile_picture_manager, 'unlock_switching'):`
			`profile_picture_manager.unlock_switching()`
			`logger.info("✓ Profile picture switching re-enabled")`
			`else:`
			`logger.warning("profile_picture_manager.unlock_switching not implemented yet, skipping")`
			`except ImportError:`
			`logger.warning("profile_picture_manager module not found, skipping")`

			`async def _pause_autonomous_engine(self):`
			`"""Pause autonomous message generation during voice session"""`
			`try:`
			`from utils.autonomous import pause_autonomous_system`
			`pause_autonomous_system()`
			`logger.info("✓ Autonomous engine paused")`
			`except ImportError:`
			`logger.warning("autonomous module not found, skipping")`
			`except AttributeError:`
			`logger.warning("pause_autonomous_system not implemented yet, skipping")`

			`async def _resume_autonomous_engine(self):`
			`"""Resume autonomous message generation after voice session"""`
			`try:`
			`from utils.autonomous import resume_autonomous_system`
			`resume_autonomous_system()`
			`logger.info("✓ Autonomous engine resumed")`
			`except ImportError:`
			`logger.warning("autonomous module not found, skipping")`
			`except AttributeError:`
			`logger.warning("resume_autonomous_system not implemented yet, skipping")`

			`async def _pause_scheduled_events(self):`
			`"""Pause all scheduled jobs during voice session"""`
			`try:`
			`globals.scheduler.pause()`
			`logger.info("✓ Scheduled events paused")`
			`except Exception as e:`
			`logger.error(f"Failed to pause scheduler: {e}")`

			`async def _resume_scheduled_events(self):`
			`"""Resume scheduled jobs after voice session"""`
			`try:`
			`globals.scheduler.resume()`
			`logger.info("✓ Scheduled events resumed")`
			`except Exception as e:`
			`logger.error(f"Failed to resume scheduler: {e}")`

			`async def _pause_figurine_notifier(self):`
			`"""Pause figurine notifications during voice session"""`
			`try:`
			`# Assuming figurine notifier is a scheduled job`
			`globals.scheduler.pause_job('figurine_notifier')`
			`logger.info("✓ Figurine notifier paused")`
			`except Exception as e:`
			`# Job might not exist, that's okay`
			`logger.debug(f"Could not pause figurine notifier (may not exist): {e}")`

			`async def _resume_figurine_notifier(self):`
			`"""Resume figurine notifications after voice session"""`
			`try:`
			`globals.scheduler.resume_job('figurine_notifier')`
			`logger.info("✓ Figurine notifier resumed")`
			`except Exception as e:`
			`# Job might not exist, that's okay`
			`logger.debug(f"Could not resume figurine notifier (may not exist): {e}")`

			`async def _cleanup_failed_start(self):`
			`"""Cleanup resources if session start fails"""`
			`logger.warning("Cleaning up after failed session start...")`
			`try:`
			`await self._unblock_vision_model()`
			`await self._enable_image_generation()`
			`await self._resume_text_channels()`
			`await self._enable_bipolar_mode()`
			`await self._enable_profile_picture_switching()`
			`await self._resume_autonomous_engine()`
			`await self._resume_scheduled_events()`
			`await self._resume_figurine_notifier()`
			`except Exception as e:`
			`logger.error(f"Error during cleanup: {e}")`


			`class VoiceSession:`
			`"""`
			`Represents an active voice chat session.`
			`Phase 1: Basic structure only, voice connection in Phase 2.`
			`"""`

			`def __init__(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):`
			`self.guild_id = guild_id`
			`self.voice_channel = voice_channel`
			`self.text_channel = text_channel`
			`self.voice_client: Optional[discord.VoiceClient] = None`
			`self.active = False`

			`logger.info(f"VoiceSession created for {voice_channel.name} in guild {guild_id}")`

			`# Phase 2: Implement voice connection, audio streaming, TTS integration`


			`# Global singleton instance`
			`voice_manager = VoiceSessionManager()`