Untested Phase 1 (Foundation & Resource management) of voice chat integration

2026-01-16 13:01:08 +02:00
parent 353c9c9583
commit 911f11ee9f
9 changed files with 2288 additions and 0 deletions
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -125,6 +125,19 @@ async def on_message(message):
    if message.author == globals.client.user:
        return
    
+    # Check for voice commands first (!miku join, !miku leave, !miku voice-status)
+    if not isinstance(message.channel, discord.DMChannel) and message.content.strip().lower().startswith('!miku '):
+        from commands.voice import handle_voice_command
+        
+        parts = message.content.strip().split()
+        if len(parts) >= 2:
+            cmd = parts[1].lower()
+            args = parts[2:] if len(parts) > 2 else []
+            
+            if cmd in ['join', 'leave', 'voice-status']:
+                await handle_voice_command(message, cmd, args)
+                return
+    
    # Skip processing if a bipolar argument is in progress in this channel
    if not isinstance(message.channel, discord.DMChannel):
        from utils.bipolar_mode import is_argument_in_progress
@@ -196,6 +209,14 @@ async def on_message(message):
                logger.error(f"Failed to fetch replied message for context: {e}")

        async with message.channel.typing():
+            # Check if vision model is blocked (voice session active)
+            if message.attachments and globals.VISION_MODEL_BLOCKED:
+                await message.channel.send(
+                    "🎤 I can't look at images or videos right now, I'm talking in voice chat! "
+                    "Send it again after I leave the voice channel."
+                )
+                return
+            
            # If message has an image, video, or GIF attachment
            if message.attachments:
                for attachment in message.attachments:
@@ -504,6 +525,13 @@ async def on_message(message):
            if is_image_request and image_prompt:
                logger.info(f"🎨 Image generation request detected: '{image_prompt}' from {message.author.display_name}")
                
+                # Block image generation during voice sessions
+                if globals.IMAGE_GENERATION_BLOCKED:
+                    await message.channel.send(globals.IMAGE_GENERATION_BLOCK_MESSAGE)
+                    await message.add_reaction('🎤')
+                    logger.info("🚫 Image generation blocked - voice session active")
+                    return
+                
                # Handle the image generation workflow
                success = await handle_image_generation_request(message, image_prompt)
                if success:
--- a/bot/commands/voice.py
+++ b/bot/commands/voice.py
@@ -0,0 +1,229 @@
+# voice.py
+"""
+Voice channel commands for Miku Discord bot.
+Handles joining, leaving, and status commands for voice chat sessions.
+"""
+
+import discord
+from utils.voice_manager import voice_manager
+from utils.logger import get_logger
+
+logger = get_logger('voice_commands')
+
+
+async def handle_voice_command(message, cmd, args):
+    """
+    Handle voice-related commands.
+    
+    Args:
+        message: Discord message object
+        cmd: Command name (join, leave, voice-status)
+        args: Command arguments
+    """
+    
+    if cmd == 'join':
+        await _handle_join(message, args)
+    
+    elif cmd == 'leave':
+        await _handle_leave(message)
+    
+    elif cmd == 'voice-status':
+        await _handle_status(message)
+    
+    else:
+        await message.channel.send(f"❌ Unknown voice command: `{cmd}`")
+
+
+async def _handle_join(message, args):
+    """
+    Handle !miku join command.
+    Join voice channel and start session with resource locks.
+    """
+    # Get voice channel
+    voice_channel = None
+    
+    if args and args[0].startswith('<#'):
+        # Channel mentioned (e.g., !miku join #voice-chat)
+        try:
+            channel_id = int(args[0][2:-1])
+            voice_channel = message.guild.get_channel(channel_id)
+            
+            if not isinstance(voice_channel, discord.VoiceChannel):
+                await message.channel.send("❌ That's not a voice channel!")
+                return
+        except (ValueError, AttributeError):
+            await message.channel.send("❌ Invalid channel!")
+            return
+    
+    else:
+        # Use user's current voice channel
+        if message.author.voice and message.author.voice.channel:
+            voice_channel = message.author.voice.channel
+        else:
+            await message.channel.send(
+                "❌ You must be in a voice channel! "
+                "Or mention a voice channel like `!miku join #voice-chat`"
+            )
+            return
+    
+    # Check permissions
+    if not voice_channel.permissions_for(message.guild.me).connect:
+        await message.channel.send(f"❌ I don't have permission to join {voice_channel.mention}!")
+        return
+    
+    if not voice_channel.permissions_for(message.guild.me).speak:
+        await message.channel.send(f"❌ I don't have permission to speak in {voice_channel.mention}!")
+        return
+    
+    # Start session
+    try:
+        await message.channel.send(f"🎤 Joining {voice_channel.mention}...")
+        
+        await voice_manager.start_session(
+            message.guild.id,
+            voice_channel,
+            message.channel  # Use current text channel for prompts
+        )
+        
+        embed = discord.Embed(
+            title="🎤 Voice Chat Active",
+            description=f"I've joined {voice_channel.mention}!",
+            color=discord.Color.from_rgb(134, 206, 203)  # Miku teal
+        )
+        embed.add_field(
+            name="How to use",
+            value=f"Send messages in {message.channel.mention} to make me speak!",
+            inline=False
+        )
+        embed.add_field(
+            name="⚠️ Resource Mode",
+            value=(
+                "• Text inference on AMD GPU only\n"
+                "• Vision model disabled\n"
+                "• Image generation disabled\n"
+                "• Other text channels paused"
+            ),
+            inline=False
+        )
+        embed.set_footer(text="Use !miku leave to end the session")
+        
+        await message.channel.send(embed=embed)
+        
+        logger.info(f"Voice session started by {message.author} in {voice_channel.name}")
+        
+    except Exception as e:
+        await message.channel.send(f"❌ Failed to join voice: {str(e)}")
+        logger.error(f"Failed to start voice session: {e}", exc_info=True)
+
+
+async def _handle_leave(message):
+    """
+    Handle !miku leave command.
+    Leave voice channel and release all resources.
+    """
+    if not voice_manager.active_session:
+        await message.channel.send("❌ I'm not in a voice channel!")
+        return
+    
+    # Check if user is in the same guild as the active session
+    if voice_manager.active_session.guild_id != message.guild.id:
+        await message.channel.send("❌ I'm in a voice channel in a different server!")
+        return
+    
+    try:
+        voice_channel_name = voice_manager.active_session.voice_channel.name
+        
+        await message.channel.send("👋 Leaving voice channel...")
+        
+        await voice_manager.end_session()
+        
+        embed = discord.Embed(
+            title="👋 Voice Chat Ended",
+            description=f"Left {voice_channel_name}",
+            color=discord.Color.from_rgb(134, 206, 203)
+        )
+        embed.add_field(
+            name="✅ Resources Released",
+            value=(
+                "• Vision model available\n"
+                "• Image generation available\n"
+                "• Text channels resumed\n"
+                "• All features restored"
+            ),
+            inline=False
+        )
+        
+        await message.channel.send(embed=embed)
+        
+        logger.info(f"Voice session ended by {message.author}")
+        
+    except Exception as e:
+        await message.channel.send(f"⚠️ Error leaving voice: {str(e)}")
+        logger.error(f"Failed to end voice session: {e}", exc_info=True)
+
+
+async def _handle_status(message):
+    """
+    Handle !miku voice-status command.
+    Show current voice session status.
+    """
+    if not voice_manager.active_session:
+        embed = discord.Embed(
+            title="🔇 No Active Voice Session",
+            description="I'm not currently in a voice channel.",
+            color=discord.Color.greyple()
+        )
+        embed.add_field(
+            name="To start",
+            value="Use `!miku join` while in a voice channel",
+            inline=False
+        )
+        await message.channel.send(embed=embed)
+        return
+    
+    session = voice_manager.active_session
+    
+    # Check if in same guild
+    if session.guild_id != message.guild.id:
+        await message.channel.send("ℹ️ I'm in a voice channel in a different server.")
+        return
+    
+    embed = discord.Embed(
+        title="🎤 Voice Session Active",
+        description=f"Currently in voice chat",
+        color=discord.Color.from_rgb(134, 206, 203)
+    )
+    
+    embed.add_field(
+        name="Voice Channel",
+        value=session.voice_channel.mention,
+        inline=True
+    )
+    
+    embed.add_field(
+        name="Prompt Channel",
+        value=session.text_channel.mention,
+        inline=True
+    )
+    
+    embed.add_field(
+        name="📊 Resource Allocation",
+        value=(
+            "**GPU Usage:**\n"
+            "• AMD RX 6800: Text model + RVC\n"
+            "• GTX 1660: Soprano TTS only\n\n"
+            "**Blocked Features:**\n"
+            "• ❌ Vision model\n"
+            "• ❌ Image generation\n"
+            "• ❌ Bipolar mode\n"
+            "• ❌ Profile picture changes\n"
+            "• ⏸️ Autonomous engine\n"
+            "• ⏸️ Scheduled events\n"
+            "• 📦 Other text channels (queued)"
+        ),
+        inline=False
+    )
+    
+    embed.set_footer(text="Use !miku leave to end the session")
+    
+    await message.channel.send(embed=embed)
--- a/bot/globals.py
+++ b/bot/globals.py
@@ -96,3 +96,12 @@ LAST_FULL_PROMPT = ""
 # Persona Dialogue System (conversations between Miku and Evil Miku)
 LAST_PERSONA_DIALOGUE_TIME = 0  # Timestamp of last dialogue for cooldown

+# Voice Chat Session State
+VOICE_SESSION_ACTIVE = False
+TEXT_MESSAGE_QUEUE = []  # Queue for messages received during voice session
+
+# Feature Blocking Flags (set during voice session)
+VISION_MODEL_BLOCKED = False
+IMAGE_GENERATION_BLOCKED = False
+IMAGE_GENERATION_BLOCK_MESSAGE = None
+
--- a/bot/requirements.txt
+++ b/bot/requirements.txt
@@ -20,3 +20,5 @@ numpy
 scikit-learn
 transformers
 torch
+PyNaCl>=1.5.0
+websockets>=12.0
--- a/bot/utils/autonomous.py
+++ b/bot/utils/autonomous.py
@@ -17,12 +17,34 @@ logger = get_logger('autonomous')
 _last_action_execution = {}  # guild_id -> timestamp
 _MIN_ACTION_INTERVAL = 30  # Minimum 30 seconds between autonomous actions

+# Pause state for voice sessions
+_autonomous_paused = False
+
+
+def pause_autonomous_system():
+    """Pause autonomous message generation (called during voice sessions)"""
+    global _autonomous_paused
+    _autonomous_paused = True
+    logger.info("Autonomous system paused")
+
+
+def resume_autonomous_system():
+    """Resume autonomous message generation (called after voice sessions)"""
+    global _autonomous_paused
+    _autonomous_paused = False
+    logger.info("Autonomous system resumed")
+

 async def autonomous_tick_v2(guild_id: int):
    """
    New autonomous tick that uses context-aware decision making.
    Replaces the random 10% chance with intelligent decision.
    """
+    # Check if autonomous is paused (voice session)
+    if _autonomous_paused:
+        logger.debug(f"[V2] Autonomous system paused (voice session active)")
+        return
+    
    # Rate limiting check
    now = time.time()
    if guild_id in _last_action_execution:
--- a/bot/utils/bipolar_mode.py
+++ b/bot/utils/bipolar_mode.py
@@ -28,6 +28,31 @@ MIN_EXCHANGES = 4  # Minimum number of back-and-forth exchanges before ending ca
 ARGUMENT_TRIGGER_CHANCE = 0.15  # 15% chance for the other Miku to break through
 DELAY_BETWEEN_MESSAGES = (2.0, 5.0)  # Random delay between argument messages (seconds)

+# Pause state for voice sessions
+_bipolar_interactions_paused = False
+
+# ============================================================================
+# VOICE SESSION PAUSE/RESUME
+# ============================================================================
+
+def pause_bipolar_interactions():
+    """Pause all bipolar interactions (called during voice sessions)"""
+    global _bipolar_interactions_paused
+    _bipolar_interactions_paused = True
+    logger.info("Bipolar interactions paused")
+
+
+def resume_bipolar_interactions():
+    """Resume bipolar interactions (called after voice sessions)"""
+    global _bipolar_interactions_paused
+    _bipolar_interactions_paused = False
+    logger.info("Bipolar interactions resumed")
+
+
+def is_bipolar_paused():
+    """Check if bipolar interactions are currently paused"""
+    return _bipolar_interactions_paused
+
 # ============================================================================
 # STATE PERSISTENCE
 # ============================================================================
@@ -1039,6 +1064,11 @@ async def maybe_trigger_argument(channel: discord.TextChannel, client, context:
    if not globals.BIPOLAR_MODE:
        return False
    
+    # Check if bipolar interactions are paused (voice session)
+    if is_bipolar_paused():
+        logger.debug("Bipolar argument blocked (voice session active)")
+        return False
+    
    if is_argument_in_progress(channel.id):
        return False
    
--- a/bot/utils/profile_picture_manager.py
+++ b/bot/utils/profile_picture_manager.py
@@ -47,6 +47,17 @@ class ProfilePictureManager:
    
    def __init__(self):
        self._ensure_directories()
+        self.switching_locked = False  # Lock for voice session
+    
+    def lock_switching(self):
+        """Lock profile picture changes during voice session"""
+        self.switching_locked = True
+        logger.info("Profile picture switching locked")
+        
+    def unlock_switching(self):
+        """Unlock profile picture changes after voice session"""
+        self.switching_locked = False
+        logger.info("Profile picture switching unlocked")
    
    def _ensure_directories(self):
        """Ensure profile picture directory exists"""
@@ -247,6 +258,16 @@ class ProfilePictureManager:
        Returns:
            Dict with status and metadata
        """
+        # Check if switching is locked (voice session active)
+        if self.switching_locked:
+            logger.info("Profile picture change blocked (voice session active)")
+            return {
+                "success": False,
+                "source": None,
+                "error": "Profile picture switching locked during voice session",
+                "metadata": {}
+            }
+        
        result = {
            "success": False,
            "source": None,
--- a/bot/utils/voice_manager.py
+++ b/bot/utils/voice_manager.py
@@ -0,0 +1,358 @@
+# voice_manager.py
+"""
+Voice session manager for Miku Discord bot.
+Handles Discord voice channel connections, resource locking, and feature blocking during voice sessions.
+
+During a voice session:
+- GPU switches to AMD for text inference only
+- Vision model is blocked (keeps GTX 1660 for TTS)
+- Image generation is blocked
+- Bipolar mode interactions are disabled
+- Profile picture switching is locked
+- Autonomous engine is paused
+- Scheduled events are paused
+- Text channels are paused (messages queued)
+"""
+
+import asyncio
+import json
+import os
+from typing import Optional
+import discord
+import globals
+from utils.logger import get_logger
+
+logger = get_logger('voice_manager')
+
+
+class VoiceSessionManager:
+    """
+    Singleton manager for voice chat sessions.
+    Ensures only one voice session active at a time and manages all resource locks.
+    """
+    
+    _instance = None
+    
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+    
+    def __init__(self):
+        if self._initialized:
+            return
+        
+        self.active_session: Optional['VoiceSession'] = None
+        self.session_lock = asyncio.Lock()
+        self._initialized = True
+        logger.info("VoiceSessionManager initialized")
+    
+    async def start_session(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
+        """
+        Start a voice session with full resource locking.
+        
+        Args:
+            guild_id: Discord guild ID
+            voice_channel: Voice channel to join
+            text_channel: Text channel for voice prompts
+            
+        Raises:
+            Exception: If session already active or resources can't be locked
+        """
+        async with self.session_lock:
+            if self.active_session:
+                raise Exception("Voice session already active")
+            
+            logger.info(f"Starting voice session in {voice_channel.name} (guild {guild_id})")
+            
+            try:
+                # 1. Switch to AMD GPU for text inference
+                await self._switch_to_amd_gpu()
+                
+                # 2. Block vision model loading
+                await self._block_vision_model()
+                
+                # 3. Disable image generation (ComfyUI)
+                await self._disable_image_generation()
+                
+                # 4. Pause text channel inference (queue messages)
+                await self._pause_text_channels()
+                
+                # 5. Disable bipolar mode interactions (Miku/Evil Miku arguments)
+                await self._disable_bipolar_mode()
+                
+                # 6. Disable profile picture switching
+                await self._disable_profile_picture_switching()
+                
+                # 7. Pause autonomous engine
+                await self._pause_autonomous_engine()
+                
+                # 8. Pause scheduled events
+                await self._pause_scheduled_events()
+                
+                # 9. Pause figurine notifier
+                await self._pause_figurine_notifier()
+                
+                # 10. Create and connect voice session
+                self.active_session = VoiceSession(guild_id, voice_channel, text_channel)
+                # Note: Actual voice connection will be implemented in Phase 2
+                
+                logger.info(f"✓ Voice session started successfully")
+                
+            except Exception as e:
+                logger.error(f"Failed to start voice session: {e}", exc_info=True)
+                # Cleanup on failure
+                await self._cleanup_failed_start()
+                raise
+    
+    async def end_session(self):
+        """
+        End voice session and release all resources.
+        """
+        async with self.session_lock:
+            if not self.active_session:
+                logger.warning("No active voice session to end")
+                return
+            
+            logger.info("Ending voice session...")
+            
+            try:
+                # 1. Disconnect from voice (Phase 2 implementation)
+                # await self.active_session.disconnect()
+                
+                # 2. Resume text channel inference
+                await self._resume_text_channels()
+                
+                # 3. Unblock vision model
+                await self._unblock_vision_model()
+                
+                # 4. Re-enable image generation
+                await self._enable_image_generation()
+                
+                # 5. Re-enable bipolar mode interactions
+                await self._enable_bipolar_mode()
+                
+                # 6. Re-enable profile picture switching
+                await self._enable_profile_picture_switching()
+                
+                # 7. Resume autonomous engine
+                await self._resume_autonomous_engine()
+                
+                # 8. Resume scheduled events
+                await self._resume_scheduled_events()
+                
+                # 9. Resume figurine notifier
+                await self._resume_figurine_notifier()
+                
+                # 10. Clear active session
+                self.active_session = None
+                
+                logger.info("✓ Voice session ended successfully, all resources released")
+                
+            except Exception as e:
+                logger.error(f"Error during session cleanup: {e}", exc_info=True)
+                # Force clear session even on error
+                self.active_session = None
+                raise
+    
+    # ==================== Resource Locking Methods ====================
+    
+    async def _switch_to_amd_gpu(self):
+        """Switch text inference to AMD GPU (RX 6800)"""
+        try:
+            gpu_state_file = os.path.join("memory", "gpu_state.json")
+            os.makedirs("memory", exist_ok=True)
+            
+            with open(gpu_state_file, "w") as f:
+                json.dump({"current_gpu": "amd", "reason": "voice_session"}, f)
+            
+            logger.info("✓ Switched to AMD GPU for text inference")
+        except Exception as e:
+            logger.error(f"Failed to switch GPU: {e}")
+            raise
+    
+    async def _block_vision_model(self):
+        """Prevent vision model from loading during voice session"""
+        globals.VISION_MODEL_BLOCKED = True
+        logger.info("✓ Vision model blocked")
+    
+    async def _unblock_vision_model(self):
+        """Allow vision model to load after voice session"""
+        globals.VISION_MODEL_BLOCKED = False
+        logger.info("✓ Vision model unblocked")
+    
+    async def _disable_image_generation(self):
+        """Block ComfyUI image generation during voice session"""
+        globals.IMAGE_GENERATION_BLOCKED = True
+        globals.IMAGE_GENERATION_BLOCK_MESSAGE = (
+            "🎤 I can't draw right now, I'm talking in voice chat! "
+            "Ask me again after I leave the voice channel."
+        )
+        logger.info("✓ Image generation disabled")
+    
+    async def _enable_image_generation(self):
+        """Re-enable image generation after voice session"""
+        globals.IMAGE_GENERATION_BLOCKED = False
+        globals.IMAGE_GENERATION_BLOCK_MESSAGE = None
+        logger.info("✓ Image generation re-enabled")
+    
+    async def _pause_text_channels(self):
+        """Queue text messages instead of processing during voice session"""
+        globals.VOICE_SESSION_ACTIVE = True
+        globals.TEXT_MESSAGE_QUEUE = []
+        logger.info("✓ Text channels paused (messages will be queued)")
+    
+    async def _resume_text_channels(self):
+        """Process queued messages after voice session"""
+        globals.VOICE_SESSION_ACTIVE = False
+        queued_count = len(globals.TEXT_MESSAGE_QUEUE)
+        
+        if queued_count > 0:
+            logger.info(f"Resuming text channels, {queued_count} messages queued")
+            # TODO: Process queue in Phase 2 (need message handler integration)
+            # For now, just clear the queue
+            globals.TEXT_MESSAGE_QUEUE = []
+            logger.warning(f"Discarded {queued_count} queued messages (queue processing not yet implemented)")
+        else:
+            logger.info("✓ Text channels resumed (no queued messages)")
+    
+    async def _disable_bipolar_mode(self):
+        """Prevent Miku/Evil Miku arguments during voice session"""
+        try:
+            from utils.bipolar_mode import pause_bipolar_interactions
+            pause_bipolar_interactions()
+            logger.info("✓ Bipolar mode interactions disabled")
+        except ImportError:
+            logger.warning("bipolar_mode module not found, skipping")
+        except AttributeError:
+            logger.warning("pause_bipolar_interactions not implemented yet, skipping")
+    
+    async def _enable_bipolar_mode(self):
+        """Re-enable Miku/Evil Miku arguments after voice session"""
+        try:
+            from utils.bipolar_mode import resume_bipolar_interactions
+            resume_bipolar_interactions()
+            logger.info("✓ Bipolar mode interactions re-enabled")
+        except ImportError:
+            logger.warning("bipolar_mode module not found, skipping")
+        except AttributeError:
+            logger.warning("resume_bipolar_interactions not implemented yet, skipping")
+    
+    async def _disable_profile_picture_switching(self):
+        """Lock profile picture during voice session"""
+        try:
+            from utils.profile_picture_manager import profile_picture_manager
+            if hasattr(profile_picture_manager, 'lock_switching'):
+                profile_picture_manager.lock_switching()
+                logger.info("✓ Profile picture switching disabled")
+            else:
+                logger.warning("profile_picture_manager.lock_switching not implemented yet, skipping")
+        except ImportError:
+            logger.warning("profile_picture_manager module not found, skipping")
+    
+    async def _enable_profile_picture_switching(self):
+        """Unlock profile picture after voice session"""
+        try:
+            from utils.profile_picture_manager import profile_picture_manager
+            if hasattr(profile_picture_manager, 'unlock_switching'):
+                profile_picture_manager.unlock_switching()
+                logger.info("✓ Profile picture switching re-enabled")
+            else:
+                logger.warning("profile_picture_manager.unlock_switching not implemented yet, skipping")
+        except ImportError:
+            logger.warning("profile_picture_manager module not found, skipping")
+    
+    async def _pause_autonomous_engine(self):
+        """Pause autonomous message generation during voice session"""
+        try:
+            from utils.autonomous import pause_autonomous_system
+            pause_autonomous_system()
+            logger.info("✓ Autonomous engine paused")
+        except ImportError:
+            logger.warning("autonomous module not found, skipping")
+        except AttributeError:
+            logger.warning("pause_autonomous_system not implemented yet, skipping")
+    
+    async def _resume_autonomous_engine(self):
+        """Resume autonomous message generation after voice session"""
+        try:
+            from utils.autonomous import resume_autonomous_system
+            resume_autonomous_system()
+            logger.info("✓ Autonomous engine resumed")
+        except ImportError:
+            logger.warning("autonomous module not found, skipping")
+        except AttributeError:
+            logger.warning("resume_autonomous_system not implemented yet, skipping")
+    
+    async def _pause_scheduled_events(self):
+        """Pause all scheduled jobs during voice session"""
+        try:
+            globals.scheduler.pause()
+            logger.info("✓ Scheduled events paused")
+        except Exception as e:
+            logger.error(f"Failed to pause scheduler: {e}")
+    
+    async def _resume_scheduled_events(self):
+        """Resume scheduled jobs after voice session"""
+        try:
+            globals.scheduler.resume()
+            logger.info("✓ Scheduled events resumed")
+        except Exception as e:
+            logger.error(f"Failed to resume scheduler: {e}")
+    
+    async def _pause_figurine_notifier(self):
+        """Pause figurine notifications during voice session"""
+        try:
+            # Assuming figurine notifier is a scheduled job
+            globals.scheduler.pause_job('figurine_notifier')
+            logger.info("✓ Figurine notifier paused")
+        except Exception as e:
+            # Job might not exist, that's okay
+            logger.debug(f"Could not pause figurine notifier (may not exist): {e}")
+    
+    async def _resume_figurine_notifier(self):
+        """Resume figurine notifications after voice session"""
+        try:
+            globals.scheduler.resume_job('figurine_notifier')
+            logger.info("✓ Figurine notifier resumed")
+        except Exception as e:
+            # Job might not exist, that's okay
+            logger.debug(f"Could not resume figurine notifier (may not exist): {e}")
+    
+    async def _cleanup_failed_start(self):
+        """Cleanup resources if session start fails"""
+        logger.warning("Cleaning up after failed session start...")
+        try:
+            await self._unblock_vision_model()
+            await self._enable_image_generation()
+            await self._resume_text_channels()
+            await self._enable_bipolar_mode()
+            await self._enable_profile_picture_switching()
+            await self._resume_autonomous_engine()
+            await self._resume_scheduled_events()
+            await self._resume_figurine_notifier()
+        except Exception as e:
+            logger.error(f"Error during cleanup: {e}")
+
+
+class VoiceSession:
+    """
+    Represents an active voice chat session.
+    Phase 1: Basic structure only, voice connection in Phase 2.
+    """
+    
+    def __init__(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
+        self.guild_id = guild_id
+        self.voice_channel = voice_channel
+        self.text_channel = text_channel
+        self.voice_client: Optional[discord.VoiceClient] = None
+        self.active = False
+        
+        logger.info(f"VoiceSession created for {voice_channel.name} in guild {guild_id}")
+    
+    # Phase 2: Implement voice connection, audio streaming, TTS integration
+
+
+# Global singleton instance
+voice_manager = VoiceSessionManager()
--- a/readmes/VOICE_CHAT_IMPLEMENTATION_PLAN.md
+++ b/readmes/VOICE_CHAT_IMPLEMENTATION_PLAN.md