Untested Phase 1 (Foundation & Resource management) of voice chat integration
This commit is contained in:
28
bot/bot.py
28
bot/bot.py
@@ -125,6 +125,19 @@ async def on_message(message):
|
|||||||
if message.author == globals.client.user:
|
if message.author == globals.client.user:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Check for voice commands first (!miku join, !miku leave, !miku voice-status)
|
||||||
|
if not isinstance(message.channel, discord.DMChannel) and message.content.strip().lower().startswith('!miku '):
|
||||||
|
from commands.voice import handle_voice_command
|
||||||
|
|
||||||
|
parts = message.content.strip().split()
|
||||||
|
if len(parts) >= 2:
|
||||||
|
cmd = parts[1].lower()
|
||||||
|
args = parts[2:] if len(parts) > 2 else []
|
||||||
|
|
||||||
|
if cmd in ['join', 'leave', 'voice-status']:
|
||||||
|
await handle_voice_command(message, cmd, args)
|
||||||
|
return
|
||||||
|
|
||||||
# Skip processing if a bipolar argument is in progress in this channel
|
# Skip processing if a bipolar argument is in progress in this channel
|
||||||
if not isinstance(message.channel, discord.DMChannel):
|
if not isinstance(message.channel, discord.DMChannel):
|
||||||
from utils.bipolar_mode import is_argument_in_progress
|
from utils.bipolar_mode import is_argument_in_progress
|
||||||
@@ -196,6 +209,14 @@ async def on_message(message):
|
|||||||
logger.error(f"Failed to fetch replied message for context: {e}")
|
logger.error(f"Failed to fetch replied message for context: {e}")
|
||||||
|
|
||||||
async with message.channel.typing():
|
async with message.channel.typing():
|
||||||
|
# Check if vision model is blocked (voice session active)
|
||||||
|
if message.attachments and globals.VISION_MODEL_BLOCKED:
|
||||||
|
await message.channel.send(
|
||||||
|
"🎤 I can't look at images or videos right now, I'm talking in voice chat! "
|
||||||
|
"Send it again after I leave the voice channel."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
# If message has an image, video, or GIF attachment
|
# If message has an image, video, or GIF attachment
|
||||||
if message.attachments:
|
if message.attachments:
|
||||||
for attachment in message.attachments:
|
for attachment in message.attachments:
|
||||||
@@ -504,6 +525,13 @@ async def on_message(message):
|
|||||||
if is_image_request and image_prompt:
|
if is_image_request and image_prompt:
|
||||||
logger.info(f"🎨 Image generation request detected: '{image_prompt}' from {message.author.display_name}")
|
logger.info(f"🎨 Image generation request detected: '{image_prompt}' from {message.author.display_name}")
|
||||||
|
|
||||||
|
# Block image generation during voice sessions
|
||||||
|
if globals.IMAGE_GENERATION_BLOCKED:
|
||||||
|
await message.channel.send(globals.IMAGE_GENERATION_BLOCK_MESSAGE)
|
||||||
|
await message.add_reaction('🎤')
|
||||||
|
logger.info("🚫 Image generation blocked - voice session active")
|
||||||
|
return
|
||||||
|
|
||||||
# Handle the image generation workflow
|
# Handle the image generation workflow
|
||||||
success = await handle_image_generation_request(message, image_prompt)
|
success = await handle_image_generation_request(message, image_prompt)
|
||||||
if success:
|
if success:
|
||||||
|
|||||||
229
bot/commands/voice.py
Normal file
229
bot/commands/voice.py
Normal file
@@ -0,0 +1,229 @@
|
|||||||
|
# voice.py
|
||||||
|
"""
|
||||||
|
Voice channel commands for Miku Discord bot.
|
||||||
|
Handles joining, leaving, and status commands for voice chat sessions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import discord
|
||||||
|
from utils.voice_manager import voice_manager
|
||||||
|
from utils.logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger('voice_commands')
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_voice_command(message, cmd, args):
|
||||||
|
"""
|
||||||
|
Handle voice-related commands.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message: Discord message object
|
||||||
|
cmd: Command name (join, leave, voice-status)
|
||||||
|
args: Command arguments
|
||||||
|
"""
|
||||||
|
|
||||||
|
if cmd == 'join':
|
||||||
|
await _handle_join(message, args)
|
||||||
|
|
||||||
|
elif cmd == 'leave':
|
||||||
|
await _handle_leave(message)
|
||||||
|
|
||||||
|
elif cmd == 'voice-status':
|
||||||
|
await _handle_status(message)
|
||||||
|
|
||||||
|
else:
|
||||||
|
await message.channel.send(f"❌ Unknown voice command: `{cmd}`")
|
||||||
|
|
||||||
|
|
||||||
|
async def _handle_join(message, args):
|
||||||
|
"""
|
||||||
|
Handle !miku join command.
|
||||||
|
Join voice channel and start session with resource locks.
|
||||||
|
"""
|
||||||
|
# Get voice channel
|
||||||
|
voice_channel = None
|
||||||
|
|
||||||
|
if args and args[0].startswith('<#'):
|
||||||
|
# Channel mentioned (e.g., !miku join #voice-chat)
|
||||||
|
try:
|
||||||
|
channel_id = int(args[0][2:-1])
|
||||||
|
voice_channel = message.guild.get_channel(channel_id)
|
||||||
|
|
||||||
|
if not isinstance(voice_channel, discord.VoiceChannel):
|
||||||
|
await message.channel.send("❌ That's not a voice channel!")
|
||||||
|
return
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
await message.channel.send("❌ Invalid channel!")
|
||||||
|
return
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Use user's current voice channel
|
||||||
|
if message.author.voice and message.author.voice.channel:
|
||||||
|
voice_channel = message.author.voice.channel
|
||||||
|
else:
|
||||||
|
await message.channel.send(
|
||||||
|
"❌ You must be in a voice channel! "
|
||||||
|
"Or mention a voice channel like `!miku join #voice-chat`"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Check permissions
|
||||||
|
if not voice_channel.permissions_for(message.guild.me).connect:
|
||||||
|
await message.channel.send(f"❌ I don't have permission to join {voice_channel.mention}!")
|
||||||
|
return
|
||||||
|
|
||||||
|
if not voice_channel.permissions_for(message.guild.me).speak:
|
||||||
|
await message.channel.send(f"❌ I don't have permission to speak in {voice_channel.mention}!")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Start session
|
||||||
|
try:
|
||||||
|
await message.channel.send(f"🎤 Joining {voice_channel.mention}...")
|
||||||
|
|
||||||
|
await voice_manager.start_session(
|
||||||
|
message.guild.id,
|
||||||
|
voice_channel,
|
||||||
|
message.channel # Use current text channel for prompts
|
||||||
|
)
|
||||||
|
|
||||||
|
embed = discord.Embed(
|
||||||
|
title="🎤 Voice Chat Active",
|
||||||
|
description=f"I've joined {voice_channel.mention}!",
|
||||||
|
color=discord.Color.from_rgb(134, 206, 203) # Miku teal
|
||||||
|
)
|
||||||
|
embed.add_field(
|
||||||
|
name="How to use",
|
||||||
|
value=f"Send messages in {message.channel.mention} to make me speak!",
|
||||||
|
inline=False
|
||||||
|
)
|
||||||
|
embed.add_field(
|
||||||
|
name="⚠️ Resource Mode",
|
||||||
|
value=(
|
||||||
|
"• Text inference on AMD GPU only\n"
|
||||||
|
"• Vision model disabled\n"
|
||||||
|
"• Image generation disabled\n"
|
||||||
|
"• Other text channels paused"
|
||||||
|
),
|
||||||
|
inline=False
|
||||||
|
)
|
||||||
|
embed.set_footer(text="Use !miku leave to end the session")
|
||||||
|
|
||||||
|
await message.channel.send(embed=embed)
|
||||||
|
|
||||||
|
logger.info(f"Voice session started by {message.author} in {voice_channel.name}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
await message.channel.send(f"❌ Failed to join voice: {str(e)}")
|
||||||
|
logger.error(f"Failed to start voice session: {e}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
|
async def _handle_leave(message):
|
||||||
|
"""
|
||||||
|
Handle !miku leave command.
|
||||||
|
Leave voice channel and release all resources.
|
||||||
|
"""
|
||||||
|
if not voice_manager.active_session:
|
||||||
|
await message.channel.send("❌ I'm not in a voice channel!")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Check if user is in the same guild as the active session
|
||||||
|
if voice_manager.active_session.guild_id != message.guild.id:
|
||||||
|
await message.channel.send("❌ I'm in a voice channel in a different server!")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
voice_channel_name = voice_manager.active_session.voice_channel.name
|
||||||
|
|
||||||
|
await message.channel.send("👋 Leaving voice channel...")
|
||||||
|
|
||||||
|
await voice_manager.end_session()
|
||||||
|
|
||||||
|
embed = discord.Embed(
|
||||||
|
title="👋 Voice Chat Ended",
|
||||||
|
description=f"Left {voice_channel_name}",
|
||||||
|
color=discord.Color.from_rgb(134, 206, 203)
|
||||||
|
)
|
||||||
|
embed.add_field(
|
||||||
|
name="✅ Resources Released",
|
||||||
|
value=(
|
||||||
|
"• Vision model available\n"
|
||||||
|
"• Image generation available\n"
|
||||||
|
"• Text channels resumed\n"
|
||||||
|
"• All features restored"
|
||||||
|
),
|
||||||
|
inline=False
|
||||||
|
)
|
||||||
|
|
||||||
|
await message.channel.send(embed=embed)
|
||||||
|
|
||||||
|
logger.info(f"Voice session ended by {message.author}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
await message.channel.send(f"⚠️ Error leaving voice: {str(e)}")
|
||||||
|
logger.error(f"Failed to end voice session: {e}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
|
async def _handle_status(message):
|
||||||
|
"""
|
||||||
|
Handle !miku voice-status command.
|
||||||
|
Show current voice session status.
|
||||||
|
"""
|
||||||
|
if not voice_manager.active_session:
|
||||||
|
embed = discord.Embed(
|
||||||
|
title="🔇 No Active Voice Session",
|
||||||
|
description="I'm not currently in a voice channel.",
|
||||||
|
color=discord.Color.greyple()
|
||||||
|
)
|
||||||
|
embed.add_field(
|
||||||
|
name="To start",
|
||||||
|
value="Use `!miku join` while in a voice channel",
|
||||||
|
inline=False
|
||||||
|
)
|
||||||
|
await message.channel.send(embed=embed)
|
||||||
|
return
|
||||||
|
|
||||||
|
session = voice_manager.active_session
|
||||||
|
|
||||||
|
# Check if in same guild
|
||||||
|
if session.guild_id != message.guild.id:
|
||||||
|
await message.channel.send("ℹ️ I'm in a voice channel in a different server.")
|
||||||
|
return
|
||||||
|
|
||||||
|
embed = discord.Embed(
|
||||||
|
title="🎤 Voice Session Active",
|
||||||
|
description=f"Currently in voice chat",
|
||||||
|
color=discord.Color.from_rgb(134, 206, 203)
|
||||||
|
)
|
||||||
|
|
||||||
|
embed.add_field(
|
||||||
|
name="Voice Channel",
|
||||||
|
value=session.voice_channel.mention,
|
||||||
|
inline=True
|
||||||
|
)
|
||||||
|
|
||||||
|
embed.add_field(
|
||||||
|
name="Prompt Channel",
|
||||||
|
value=session.text_channel.mention,
|
||||||
|
inline=True
|
||||||
|
)
|
||||||
|
|
||||||
|
embed.add_field(
|
||||||
|
name="📊 Resource Allocation",
|
||||||
|
value=(
|
||||||
|
"**GPU Usage:**\n"
|
||||||
|
"• AMD RX 6800: Text model + RVC\n"
|
||||||
|
"• GTX 1660: Soprano TTS only\n\n"
|
||||||
|
"**Blocked Features:**\n"
|
||||||
|
"• ❌ Vision model\n"
|
||||||
|
"• ❌ Image generation\n"
|
||||||
|
"• ❌ Bipolar mode\n"
|
||||||
|
"• ❌ Profile picture changes\n"
|
||||||
|
"• ⏸️ Autonomous engine\n"
|
||||||
|
"• ⏸️ Scheduled events\n"
|
||||||
|
"• 📦 Other text channels (queued)"
|
||||||
|
),
|
||||||
|
inline=False
|
||||||
|
)
|
||||||
|
|
||||||
|
embed.set_footer(text="Use !miku leave to end the session")
|
||||||
|
|
||||||
|
await message.channel.send(embed=embed)
|
||||||
@@ -96,3 +96,12 @@ LAST_FULL_PROMPT = ""
|
|||||||
# Persona Dialogue System (conversations between Miku and Evil Miku)
|
# Persona Dialogue System (conversations between Miku and Evil Miku)
|
||||||
LAST_PERSONA_DIALOGUE_TIME = 0 # Timestamp of last dialogue for cooldown
|
LAST_PERSONA_DIALOGUE_TIME = 0 # Timestamp of last dialogue for cooldown
|
||||||
|
|
||||||
|
# Voice Chat Session State
|
||||||
|
VOICE_SESSION_ACTIVE = False
|
||||||
|
TEXT_MESSAGE_QUEUE = [] # Queue for messages received during voice session
|
||||||
|
|
||||||
|
# Feature Blocking Flags (set during voice session)
|
||||||
|
VISION_MODEL_BLOCKED = False
|
||||||
|
IMAGE_GENERATION_BLOCKED = False
|
||||||
|
IMAGE_GENERATION_BLOCK_MESSAGE = None
|
||||||
|
|
||||||
|
|||||||
@@ -20,3 +20,5 @@ numpy
|
|||||||
scikit-learn
|
scikit-learn
|
||||||
transformers
|
transformers
|
||||||
torch
|
torch
|
||||||
|
PyNaCl>=1.5.0
|
||||||
|
websockets>=12.0
|
||||||
|
|||||||
@@ -17,12 +17,34 @@ logger = get_logger('autonomous')
|
|||||||
_last_action_execution = {} # guild_id -> timestamp
|
_last_action_execution = {} # guild_id -> timestamp
|
||||||
_MIN_ACTION_INTERVAL = 30 # Minimum 30 seconds between autonomous actions
|
_MIN_ACTION_INTERVAL = 30 # Minimum 30 seconds between autonomous actions
|
||||||
|
|
||||||
|
# Pause state for voice sessions
|
||||||
|
_autonomous_paused = False
|
||||||
|
|
||||||
|
|
||||||
|
def pause_autonomous_system():
|
||||||
|
"""Pause autonomous message generation (called during voice sessions)"""
|
||||||
|
global _autonomous_paused
|
||||||
|
_autonomous_paused = True
|
||||||
|
logger.info("Autonomous system paused")
|
||||||
|
|
||||||
|
|
||||||
|
def resume_autonomous_system():
|
||||||
|
"""Resume autonomous message generation (called after voice sessions)"""
|
||||||
|
global _autonomous_paused
|
||||||
|
_autonomous_paused = False
|
||||||
|
logger.info("Autonomous system resumed")
|
||||||
|
|
||||||
|
|
||||||
async def autonomous_tick_v2(guild_id: int):
|
async def autonomous_tick_v2(guild_id: int):
|
||||||
"""
|
"""
|
||||||
New autonomous tick that uses context-aware decision making.
|
New autonomous tick that uses context-aware decision making.
|
||||||
Replaces the random 10% chance with intelligent decision.
|
Replaces the random 10% chance with intelligent decision.
|
||||||
"""
|
"""
|
||||||
|
# Check if autonomous is paused (voice session)
|
||||||
|
if _autonomous_paused:
|
||||||
|
logger.debug(f"[V2] Autonomous system paused (voice session active)")
|
||||||
|
return
|
||||||
|
|
||||||
# Rate limiting check
|
# Rate limiting check
|
||||||
now = time.time()
|
now = time.time()
|
||||||
if guild_id in _last_action_execution:
|
if guild_id in _last_action_execution:
|
||||||
|
|||||||
@@ -28,6 +28,31 @@ MIN_EXCHANGES = 4 # Minimum number of back-and-forth exchanges before ending ca
|
|||||||
ARGUMENT_TRIGGER_CHANCE = 0.15 # 15% chance for the other Miku to break through
|
ARGUMENT_TRIGGER_CHANCE = 0.15 # 15% chance for the other Miku to break through
|
||||||
DELAY_BETWEEN_MESSAGES = (2.0, 5.0) # Random delay between argument messages (seconds)
|
DELAY_BETWEEN_MESSAGES = (2.0, 5.0) # Random delay between argument messages (seconds)
|
||||||
|
|
||||||
|
# Pause state for voice sessions
|
||||||
|
_bipolar_interactions_paused = False
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# VOICE SESSION PAUSE/RESUME
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def pause_bipolar_interactions():
|
||||||
|
"""Pause all bipolar interactions (called during voice sessions)"""
|
||||||
|
global _bipolar_interactions_paused
|
||||||
|
_bipolar_interactions_paused = True
|
||||||
|
logger.info("Bipolar interactions paused")
|
||||||
|
|
||||||
|
|
||||||
|
def resume_bipolar_interactions():
|
||||||
|
"""Resume bipolar interactions (called after voice sessions)"""
|
||||||
|
global _bipolar_interactions_paused
|
||||||
|
_bipolar_interactions_paused = False
|
||||||
|
logger.info("Bipolar interactions resumed")
|
||||||
|
|
||||||
|
|
||||||
|
def is_bipolar_paused():
|
||||||
|
"""Check if bipolar interactions are currently paused"""
|
||||||
|
return _bipolar_interactions_paused
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# STATE PERSISTENCE
|
# STATE PERSISTENCE
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@@ -1039,6 +1064,11 @@ async def maybe_trigger_argument(channel: discord.TextChannel, client, context:
|
|||||||
if not globals.BIPOLAR_MODE:
|
if not globals.BIPOLAR_MODE:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Check if bipolar interactions are paused (voice session)
|
||||||
|
if is_bipolar_paused():
|
||||||
|
logger.debug("Bipolar argument blocked (voice session active)")
|
||||||
|
return False
|
||||||
|
|
||||||
if is_argument_in_progress(channel.id):
|
if is_argument_in_progress(channel.id):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
@@ -47,6 +47,17 @@ class ProfilePictureManager:
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._ensure_directories()
|
self._ensure_directories()
|
||||||
|
self.switching_locked = False # Lock for voice session
|
||||||
|
|
||||||
|
def lock_switching(self):
|
||||||
|
"""Lock profile picture changes during voice session"""
|
||||||
|
self.switching_locked = True
|
||||||
|
logger.info("Profile picture switching locked")
|
||||||
|
|
||||||
|
def unlock_switching(self):
|
||||||
|
"""Unlock profile picture changes after voice session"""
|
||||||
|
self.switching_locked = False
|
||||||
|
logger.info("Profile picture switching unlocked")
|
||||||
|
|
||||||
def _ensure_directories(self):
|
def _ensure_directories(self):
|
||||||
"""Ensure profile picture directory exists"""
|
"""Ensure profile picture directory exists"""
|
||||||
@@ -247,6 +258,16 @@ class ProfilePictureManager:
|
|||||||
Returns:
|
Returns:
|
||||||
Dict with status and metadata
|
Dict with status and metadata
|
||||||
"""
|
"""
|
||||||
|
# Check if switching is locked (voice session active)
|
||||||
|
if self.switching_locked:
|
||||||
|
logger.info("Profile picture change blocked (voice session active)")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"source": None,
|
||||||
|
"error": "Profile picture switching locked during voice session",
|
||||||
|
"metadata": {}
|
||||||
|
}
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"success": False,
|
"success": False,
|
||||||
"source": None,
|
"source": None,
|
||||||
|
|||||||
358
bot/utils/voice_manager.py
Normal file
358
bot/utils/voice_manager.py
Normal file
@@ -0,0 +1,358 @@
|
|||||||
|
# voice_manager.py
|
||||||
|
"""
|
||||||
|
Voice session manager for Miku Discord bot.
|
||||||
|
Handles Discord voice channel connections, resource locking, and feature blocking during voice sessions.
|
||||||
|
|
||||||
|
During a voice session:
|
||||||
|
- GPU switches to AMD for text inference only
|
||||||
|
- Vision model is blocked (keeps GTX 1660 for TTS)
|
||||||
|
- Image generation is blocked
|
||||||
|
- Bipolar mode interactions are disabled
|
||||||
|
- Profile picture switching is locked
|
||||||
|
- Autonomous engine is paused
|
||||||
|
- Scheduled events are paused
|
||||||
|
- Text channels are paused (messages queued)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from typing import Optional
|
||||||
|
import discord
|
||||||
|
import globals
|
||||||
|
from utils.logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger('voice_manager')
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSessionManager:
|
||||||
|
"""
|
||||||
|
Singleton manager for voice chat sessions.
|
||||||
|
Ensures only one voice session active at a time and manages all resource locks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_instance = None
|
||||||
|
|
||||||
|
def __new__(cls):
|
||||||
|
if cls._instance is None:
|
||||||
|
cls._instance = super().__new__(cls)
|
||||||
|
cls._instance._initialized = False
|
||||||
|
return cls._instance
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if self._initialized:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.active_session: Optional['VoiceSession'] = None
|
||||||
|
self.session_lock = asyncio.Lock()
|
||||||
|
self._initialized = True
|
||||||
|
logger.info("VoiceSessionManager initialized")
|
||||||
|
|
||||||
|
async def start_session(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
|
||||||
|
"""
|
||||||
|
Start a voice session with full resource locking.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
guild_id: Discord guild ID
|
||||||
|
voice_channel: Voice channel to join
|
||||||
|
text_channel: Text channel for voice prompts
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If session already active or resources can't be locked
|
||||||
|
"""
|
||||||
|
async with self.session_lock:
|
||||||
|
if self.active_session:
|
||||||
|
raise Exception("Voice session already active")
|
||||||
|
|
||||||
|
logger.info(f"Starting voice session in {voice_channel.name} (guild {guild_id})")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. Switch to AMD GPU for text inference
|
||||||
|
await self._switch_to_amd_gpu()
|
||||||
|
|
||||||
|
# 2. Block vision model loading
|
||||||
|
await self._block_vision_model()
|
||||||
|
|
||||||
|
# 3. Disable image generation (ComfyUI)
|
||||||
|
await self._disable_image_generation()
|
||||||
|
|
||||||
|
# 4. Pause text channel inference (queue messages)
|
||||||
|
await self._pause_text_channels()
|
||||||
|
|
||||||
|
# 5. Disable bipolar mode interactions (Miku/Evil Miku arguments)
|
||||||
|
await self._disable_bipolar_mode()
|
||||||
|
|
||||||
|
# 6. Disable profile picture switching
|
||||||
|
await self._disable_profile_picture_switching()
|
||||||
|
|
||||||
|
# 7. Pause autonomous engine
|
||||||
|
await self._pause_autonomous_engine()
|
||||||
|
|
||||||
|
# 8. Pause scheduled events
|
||||||
|
await self._pause_scheduled_events()
|
||||||
|
|
||||||
|
# 9. Pause figurine notifier
|
||||||
|
await self._pause_figurine_notifier()
|
||||||
|
|
||||||
|
# 10. Create and connect voice session
|
||||||
|
self.active_session = VoiceSession(guild_id, voice_channel, text_channel)
|
||||||
|
# Note: Actual voice connection will be implemented in Phase 2
|
||||||
|
|
||||||
|
logger.info(f"✓ Voice session started successfully")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to start voice session: {e}", exc_info=True)
|
||||||
|
# Cleanup on failure
|
||||||
|
await self._cleanup_failed_start()
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def end_session(self):
|
||||||
|
"""
|
||||||
|
End voice session and release all resources.
|
||||||
|
"""
|
||||||
|
async with self.session_lock:
|
||||||
|
if not self.active_session:
|
||||||
|
logger.warning("No active voice session to end")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info("Ending voice session...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. Disconnect from voice (Phase 2 implementation)
|
||||||
|
# await self.active_session.disconnect()
|
||||||
|
|
||||||
|
# 2. Resume text channel inference
|
||||||
|
await self._resume_text_channels()
|
||||||
|
|
||||||
|
# 3. Unblock vision model
|
||||||
|
await self._unblock_vision_model()
|
||||||
|
|
||||||
|
# 4. Re-enable image generation
|
||||||
|
await self._enable_image_generation()
|
||||||
|
|
||||||
|
# 5. Re-enable bipolar mode interactions
|
||||||
|
await self._enable_bipolar_mode()
|
||||||
|
|
||||||
|
# 6. Re-enable profile picture switching
|
||||||
|
await self._enable_profile_picture_switching()
|
||||||
|
|
||||||
|
# 7. Resume autonomous engine
|
||||||
|
await self._resume_autonomous_engine()
|
||||||
|
|
||||||
|
# 8. Resume scheduled events
|
||||||
|
await self._resume_scheduled_events()
|
||||||
|
|
||||||
|
# 9. Resume figurine notifier
|
||||||
|
await self._resume_figurine_notifier()
|
||||||
|
|
||||||
|
# 10. Clear active session
|
||||||
|
self.active_session = None
|
||||||
|
|
||||||
|
logger.info("✓ Voice session ended successfully, all resources released")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during session cleanup: {e}", exc_info=True)
|
||||||
|
# Force clear session even on error
|
||||||
|
self.active_session = None
|
||||||
|
raise
|
||||||
|
|
||||||
|
# ==================== Resource Locking Methods ====================
|
||||||
|
|
||||||
|
async def _switch_to_amd_gpu(self):
|
||||||
|
"""Switch text inference to AMD GPU (RX 6800)"""
|
||||||
|
try:
|
||||||
|
gpu_state_file = os.path.join("memory", "gpu_state.json")
|
||||||
|
os.makedirs("memory", exist_ok=True)
|
||||||
|
|
||||||
|
with open(gpu_state_file, "w") as f:
|
||||||
|
json.dump({"current_gpu": "amd", "reason": "voice_session"}, f)
|
||||||
|
|
||||||
|
logger.info("✓ Switched to AMD GPU for text inference")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to switch GPU: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def _block_vision_model(self):
|
||||||
|
"""Prevent vision model from loading during voice session"""
|
||||||
|
globals.VISION_MODEL_BLOCKED = True
|
||||||
|
logger.info("✓ Vision model blocked")
|
||||||
|
|
||||||
|
async def _unblock_vision_model(self):
|
||||||
|
"""Allow vision model to load after voice session"""
|
||||||
|
globals.VISION_MODEL_BLOCKED = False
|
||||||
|
logger.info("✓ Vision model unblocked")
|
||||||
|
|
||||||
|
async def _disable_image_generation(self):
|
||||||
|
"""Block ComfyUI image generation during voice session"""
|
||||||
|
globals.IMAGE_GENERATION_BLOCKED = True
|
||||||
|
globals.IMAGE_GENERATION_BLOCK_MESSAGE = (
|
||||||
|
"🎤 I can't draw right now, I'm talking in voice chat! "
|
||||||
|
"Ask me again after I leave the voice channel."
|
||||||
|
)
|
||||||
|
logger.info("✓ Image generation disabled")
|
||||||
|
|
||||||
|
async def _enable_image_generation(self):
|
||||||
|
"""Re-enable image generation after voice session"""
|
||||||
|
globals.IMAGE_GENERATION_BLOCKED = False
|
||||||
|
globals.IMAGE_GENERATION_BLOCK_MESSAGE = None
|
||||||
|
logger.info("✓ Image generation re-enabled")
|
||||||
|
|
||||||
|
async def _pause_text_channels(self):
|
||||||
|
"""Queue text messages instead of processing during voice session"""
|
||||||
|
globals.VOICE_SESSION_ACTIVE = True
|
||||||
|
globals.TEXT_MESSAGE_QUEUE = []
|
||||||
|
logger.info("✓ Text channels paused (messages will be queued)")
|
||||||
|
|
||||||
|
async def _resume_text_channels(self):
|
||||||
|
"""Process queued messages after voice session"""
|
||||||
|
globals.VOICE_SESSION_ACTIVE = False
|
||||||
|
queued_count = len(globals.TEXT_MESSAGE_QUEUE)
|
||||||
|
|
||||||
|
if queued_count > 0:
|
||||||
|
logger.info(f"Resuming text channels, {queued_count} messages queued")
|
||||||
|
# TODO: Process queue in Phase 2 (need message handler integration)
|
||||||
|
# For now, just clear the queue
|
||||||
|
globals.TEXT_MESSAGE_QUEUE = []
|
||||||
|
logger.warning(f"Discarded {queued_count} queued messages (queue processing not yet implemented)")
|
||||||
|
else:
|
||||||
|
logger.info("✓ Text channels resumed (no queued messages)")
|
||||||
|
|
||||||
|
async def _disable_bipolar_mode(self):
|
||||||
|
"""Prevent Miku/Evil Miku arguments during voice session"""
|
||||||
|
try:
|
||||||
|
from utils.bipolar_mode import pause_bipolar_interactions
|
||||||
|
pause_bipolar_interactions()
|
||||||
|
logger.info("✓ Bipolar mode interactions disabled")
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("bipolar_mode module not found, skipping")
|
||||||
|
except AttributeError:
|
||||||
|
logger.warning("pause_bipolar_interactions not implemented yet, skipping")
|
||||||
|
|
||||||
|
async def _enable_bipolar_mode(self):
|
||||||
|
"""Re-enable Miku/Evil Miku arguments after voice session"""
|
||||||
|
try:
|
||||||
|
from utils.bipolar_mode import resume_bipolar_interactions
|
||||||
|
resume_bipolar_interactions()
|
||||||
|
logger.info("✓ Bipolar mode interactions re-enabled")
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("bipolar_mode module not found, skipping")
|
||||||
|
except AttributeError:
|
||||||
|
logger.warning("resume_bipolar_interactions not implemented yet, skipping")
|
||||||
|
|
||||||
|
async def _disable_profile_picture_switching(self):
|
||||||
|
"""Lock profile picture during voice session"""
|
||||||
|
try:
|
||||||
|
from utils.profile_picture_manager import profile_picture_manager
|
||||||
|
if hasattr(profile_picture_manager, 'lock_switching'):
|
||||||
|
profile_picture_manager.lock_switching()
|
||||||
|
logger.info("✓ Profile picture switching disabled")
|
||||||
|
else:
|
||||||
|
logger.warning("profile_picture_manager.lock_switching not implemented yet, skipping")
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("profile_picture_manager module not found, skipping")
|
||||||
|
|
||||||
|
async def _enable_profile_picture_switching(self):
|
||||||
|
"""Unlock profile picture after voice session"""
|
||||||
|
try:
|
||||||
|
from utils.profile_picture_manager import profile_picture_manager
|
||||||
|
if hasattr(profile_picture_manager, 'unlock_switching'):
|
||||||
|
profile_picture_manager.unlock_switching()
|
||||||
|
logger.info("✓ Profile picture switching re-enabled")
|
||||||
|
else:
|
||||||
|
logger.warning("profile_picture_manager.unlock_switching not implemented yet, skipping")
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("profile_picture_manager module not found, skipping")
|
||||||
|
|
||||||
|
async def _pause_autonomous_engine(self):
|
||||||
|
"""Pause autonomous message generation during voice session"""
|
||||||
|
try:
|
||||||
|
from utils.autonomous import pause_autonomous_system
|
||||||
|
pause_autonomous_system()
|
||||||
|
logger.info("✓ Autonomous engine paused")
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("autonomous module not found, skipping")
|
||||||
|
except AttributeError:
|
||||||
|
logger.warning("pause_autonomous_system not implemented yet, skipping")
|
||||||
|
|
||||||
|
async def _resume_autonomous_engine(self):
|
||||||
|
"""Resume autonomous message generation after voice session"""
|
||||||
|
try:
|
||||||
|
from utils.autonomous import resume_autonomous_system
|
||||||
|
resume_autonomous_system()
|
||||||
|
logger.info("✓ Autonomous engine resumed")
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("autonomous module not found, skipping")
|
||||||
|
except AttributeError:
|
||||||
|
logger.warning("resume_autonomous_system not implemented yet, skipping")
|
||||||
|
|
||||||
|
async def _pause_scheduled_events(self):
|
||||||
|
"""Pause all scheduled jobs during voice session"""
|
||||||
|
try:
|
||||||
|
globals.scheduler.pause()
|
||||||
|
logger.info("✓ Scheduled events paused")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to pause scheduler: {e}")
|
||||||
|
|
||||||
|
async def _resume_scheduled_events(self):
|
||||||
|
"""Resume scheduled jobs after voice session"""
|
||||||
|
try:
|
||||||
|
globals.scheduler.resume()
|
||||||
|
logger.info("✓ Scheduled events resumed")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to resume scheduler: {e}")
|
||||||
|
|
||||||
|
async def _pause_figurine_notifier(self):
|
||||||
|
"""Pause figurine notifications during voice session"""
|
||||||
|
try:
|
||||||
|
# Assuming figurine notifier is a scheduled job
|
||||||
|
globals.scheduler.pause_job('figurine_notifier')
|
||||||
|
logger.info("✓ Figurine notifier paused")
|
||||||
|
except Exception as e:
|
||||||
|
# Job might not exist, that's okay
|
||||||
|
logger.debug(f"Could not pause figurine notifier (may not exist): {e}")
|
||||||
|
|
||||||
|
async def _resume_figurine_notifier(self):
|
||||||
|
"""Resume figurine notifications after voice session"""
|
||||||
|
try:
|
||||||
|
globals.scheduler.resume_job('figurine_notifier')
|
||||||
|
logger.info("✓ Figurine notifier resumed")
|
||||||
|
except Exception as e:
|
||||||
|
# Job might not exist, that's okay
|
||||||
|
logger.debug(f"Could not resume figurine notifier (may not exist): {e}")
|
||||||
|
|
||||||
|
async def _cleanup_failed_start(self):
|
||||||
|
"""Cleanup resources if session start fails"""
|
||||||
|
logger.warning("Cleaning up after failed session start...")
|
||||||
|
try:
|
||||||
|
await self._unblock_vision_model()
|
||||||
|
await self._enable_image_generation()
|
||||||
|
await self._resume_text_channels()
|
||||||
|
await self._enable_bipolar_mode()
|
||||||
|
await self._enable_profile_picture_switching()
|
||||||
|
await self._resume_autonomous_engine()
|
||||||
|
await self._resume_scheduled_events()
|
||||||
|
await self._resume_figurine_notifier()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during cleanup: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
class VoiceSession:
|
||||||
|
"""
|
||||||
|
Represents an active voice chat session.
|
||||||
|
Phase 1: Basic structure only, voice connection in Phase 2.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
|
||||||
|
self.guild_id = guild_id
|
||||||
|
self.voice_channel = voice_channel
|
||||||
|
self.text_channel = text_channel
|
||||||
|
self.voice_client: Optional[discord.VoiceClient] = None
|
||||||
|
self.active = False
|
||||||
|
|
||||||
|
logger.info(f"VoiceSession created for {voice_channel.name} in guild {guild_id}")
|
||||||
|
|
||||||
|
# Phase 2: Implement voice connection, audio streaming, TTS integration
|
||||||
|
|
||||||
|
|
||||||
|
# Global singleton instance
|
||||||
|
voice_manager = VoiceSessionManager()
|
||||||
1589
readmes/VOICE_CHAT_IMPLEMENTATION_PLAN.md
Normal file
1589
readmes/VOICE_CHAT_IMPLEMENTATION_PLAN.md
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user