MEDIUM: Optimize Memory Usage #31

Open
opened 2026-02-16 22:55:35 +02:00 by Koko210 · 0 comments
Owner

Bot accumulates memory over time due to unclosed resources and unnecessary data retention, causing crashes after extended runtime.

Where It Occurs

  • bot/bot.py - Message history storage
  • bot/utils/voice_audio.py - Audio data buffers
  • cat-plugins/cat_client.py - Response caching
  • bot/utils/llm.py - LLM response storage
  • bot/stt_client.py - Audio transcript caching

Why This Is a Problem

  1. Memory Leaks: Unclosed resources accumulate memory
  2. Crashes: Out-of-memory errors after hours/days
  3. Poor Scaling: More users = more memory usage
  4. Slow Degradation: Performance degrades as memory fills

What Can Go Wrong

Scenario 1: Message History Accumulation

  1. Bot stores all messages in memory
  2. After 24 hours, stored 100,000+ messages
  3. Memory usage grows to 2GB+
  4. System starts swapping
  5. Bot becomes unresponsive
  6. OOM killer kills bot process

Scenario 2: Unclosed HTTP Sessions

  1. New aiohttp.ClientSession() for each request
  2. Sessions never closed
  3. Connections pile up
  4. File descriptors exhausted
  5. Bot can't make new HTTP requests
  6. Bot fails completely

Scenario 3: Audio Data Not Cleared

  1. Voice chat sessions create audio buffers
  2. After user leaves, buffers not cleared
  3. Memory grows with each voice session
  4. After 50+ voice sessions, memory exhausted
  5. Bot crashes

Proposed Fix

Implement memory optimization strategies:

# bot/utils/memory_manager.py - NEW FILE
import weakref
import gc
from functools import wraps
from typing import Any, Optional, Dict
from collections import OrderedDict
import logging
import psutil

logger = logging.getLogger(__name__)

class LRUConfig:
    def __init__(self, max_size: int = 1000, ttl: float = 3600.0):
        self.max_size = max_size
        self.ttl = ttl  # Time-to-live in seconds

class LRUCache:
    """LRU cache with TTL support"""
    
    def __init__(self, config: LRUConfig):
        self.max_size = config.max_size
        self.ttl = config.ttl
        self.cache: OrderedDict = OrderedDict()
    
    def get(self, key: str) -> Optional[Any]:
        if key not in self.cache:
            return None
        
        entry = self.cache[key]
        
        # Check TTL
        if time.time() - entry['timestamp'] > self.ttl:
            del self.cache[key]
            return None
        
        # Move to end (most recently used)
        self.cache.move_to_end(key)
        return entry['value']
    
    def set(self, key: str, value: Any):
        # Remove oldest if at capacity
        if len(self.cache) >= self.max_size:
            self.cache.popitem(last=False)
        
        self.cache[key] = {
            'value': value,
            'timestamp': time.time()
        }
        self.cache.move_to_end(key)
    
    def clear(self):
        self.cache.clear()

# Global caches
message_cache = LRUCache(LRUConfig(max_size=1000, ttl=3600))
response_cache = LRUCache(LRUConfig(max_size=500, ttl=7200))
audio_buffer_cache = LRUCache(LRUConfig(max_size=100, ttl=300))

def track_memory_usage(operation_name: str):
    """Decorator to track memory usage of operations"""
    def decorator(func):
        @wraps(func)
        async def async_wrapper(*args, **kwargs):
            process = psutil.Process()
            before = process.memory_info().rss / 1024 / 1024  # MB
            
            try:
                result = await func(*args, **kwargs)
                return result
            finally:
                after = process.memory_info().rss / 1024 / 1024  # MB
                delta = after - before
                if delta > 10:  # Log if delta > 10MB
                    logger.warning(
                        f"Memory: {operation_name} - "
                        f"Before: {before:.2f}MB, "
                        f"After: {after:.2f}MB, "
                        f"Delta: {delta:.2f}MB"
                    )
        return async_wrapper
    return decorator

def cleanup_memory():
    """Force garbage collection and log results"""
    before = psutil.Process().memory_info().rss / 1024 / 1024
    
    # Clear caches
    message_cache.clear()
    response_cache.clear()
    audio_buffer_cache.clear()
    
    # Force garbage collection
    gc.collect()
    
    after = psutil.Process().memory_info().rss / 1024 / 1024
    logger.info(f"Memory cleanup: {before:.2f}MB -> {after:.2f}MB (freed {before-after:.2f}MB)")

# Usage in bot.py - Limit message history
MAX_HISTORY_PER_CHANNEL = 100
channel_histories = {}

async def store_message(message):
    """Store message with LRU eviction"""
    channel_id = message.channel.id
    
    if channel_id not in channel_histories:
        channel_histories[channel_id] = []
    
    history = channel_histories[channel_id]
    history.append({
        'content': message.content,
        'timestamp': time.time(),
        'author': message.author.id
    })
    
    # Enforce LRU
    if len(history) > MAX_HISTORY_PER_CHANNEL:
        history.pop(0)  # Remove oldest

# Usage in stt_client.py - Clear audio buffers
@track_memory_usage('stt_transcription')
async def transcribe_audio(audio_data):
    """Transcribe audio with cleanup"""
    try:
        result = await stt_api.transcribe(audio_data)
        return result
    finally:
        # Explicitly clear audio data
        del audio_data
        gc.collect()

# Periodic memory cleanup
@tasks.loop(minutes=30)
async def cleanup_memory_periodically():
    """Periodically clean up memory"""
    logger.info("Running periodic memory cleanup...")
    cleanup_memory()

# Close resources properly
class ResourceManager:
    """Manage HTTP sessions and other resources"""
    
    def __init__(self):
        self.http_session = None
    
    async def get_session(self):
        if self.http_session is None or self.http_session.closed:
            self.http_session = aiohttp.ClientSession()
        return self.http_session
    
    async def close(self):
        if self.http_session and not self.http_session.closed:
            await self.http_session.close()

resource_manager = ResourceManager()

# In bot.py cleanup
@bot.event
async def on_close():
    """Clean up resources on shutdown"""
    await resource_manager.close()
    cleanup_memory()

Severity

MEDIUM - Memory leaks cause crashes and poor long-term reliability.

Files Affected

bot/bot.py, bot/utils/voice_audio.py, cat-plugins/cat_client.py, bot/utils/llm.py, bot/stt_client.py, new file: bot/utils/memory_manager.py

Bot accumulates memory over time due to unclosed resources and unnecessary data retention, causing crashes after extended runtime. ## Where It Occurs - bot/bot.py - Message history storage - bot/utils/voice_audio.py - Audio data buffers - cat-plugins/cat_client.py - Response caching - bot/utils/llm.py - LLM response storage - bot/stt_client.py - Audio transcript caching ## Why This Is a Problem 1. Memory Leaks: Unclosed resources accumulate memory 2. Crashes: Out-of-memory errors after hours/days 3. Poor Scaling: More users = more memory usage 4. Slow Degradation: Performance degrades as memory fills ## What Can Go Wrong ### Scenario 1: Message History Accumulation 1. Bot stores all messages in memory 2. After 24 hours, stored 100,000+ messages 3. Memory usage grows to 2GB+ 4. System starts swapping 5. Bot becomes unresponsive 6. OOM killer kills bot process ### Scenario 2: Unclosed HTTP Sessions 1. New aiohttp.ClientSession() for each request 2. Sessions never closed 3. Connections pile up 4. File descriptors exhausted 5. Bot can't make new HTTP requests 6. Bot fails completely ### Scenario 3: Audio Data Not Cleared 1. Voice chat sessions create audio buffers 2. After user leaves, buffers not cleared 3. Memory grows with each voice session 4. After 50+ voice sessions, memory exhausted 5. Bot crashes ## Proposed Fix Implement memory optimization strategies: ```python # bot/utils/memory_manager.py - NEW FILE import weakref import gc from functools import wraps from typing import Any, Optional, Dict from collections import OrderedDict import logging import psutil logger = logging.getLogger(__name__) class LRUConfig: def __init__(self, max_size: int = 1000, ttl: float = 3600.0): self.max_size = max_size self.ttl = ttl # Time-to-live in seconds class LRUCache: """LRU cache with TTL support""" def __init__(self, config: LRUConfig): self.max_size = config.max_size self.ttl = config.ttl self.cache: OrderedDict = OrderedDict() def get(self, key: str) -> Optional[Any]: if key not in self.cache: return None entry = self.cache[key] # Check TTL if time.time() - entry['timestamp'] > self.ttl: del self.cache[key] return None # Move to end (most recently used) self.cache.move_to_end(key) return entry['value'] def set(self, key: str, value: Any): # Remove oldest if at capacity if len(self.cache) >= self.max_size: self.cache.popitem(last=False) self.cache[key] = { 'value': value, 'timestamp': time.time() } self.cache.move_to_end(key) def clear(self): self.cache.clear() # Global caches message_cache = LRUCache(LRUConfig(max_size=1000, ttl=3600)) response_cache = LRUCache(LRUConfig(max_size=500, ttl=7200)) audio_buffer_cache = LRUCache(LRUConfig(max_size=100, ttl=300)) def track_memory_usage(operation_name: str): """Decorator to track memory usage of operations""" def decorator(func): @wraps(func) async def async_wrapper(*args, **kwargs): process = psutil.Process() before = process.memory_info().rss / 1024 / 1024 # MB try: result = await func(*args, **kwargs) return result finally: after = process.memory_info().rss / 1024 / 1024 # MB delta = after - before if delta > 10: # Log if delta > 10MB logger.warning( f"Memory: {operation_name} - " f"Before: {before:.2f}MB, " f"After: {after:.2f}MB, " f"Delta: {delta:.2f}MB" ) return async_wrapper return decorator def cleanup_memory(): """Force garbage collection and log results""" before = psutil.Process().memory_info().rss / 1024 / 1024 # Clear caches message_cache.clear() response_cache.clear() audio_buffer_cache.clear() # Force garbage collection gc.collect() after = psutil.Process().memory_info().rss / 1024 / 1024 logger.info(f"Memory cleanup: {before:.2f}MB -> {after:.2f}MB (freed {before-after:.2f}MB)") # Usage in bot.py - Limit message history MAX_HISTORY_PER_CHANNEL = 100 channel_histories = {} async def store_message(message): """Store message with LRU eviction""" channel_id = message.channel.id if channel_id not in channel_histories: channel_histories[channel_id] = [] history = channel_histories[channel_id] history.append({ 'content': message.content, 'timestamp': time.time(), 'author': message.author.id }) # Enforce LRU if len(history) > MAX_HISTORY_PER_CHANNEL: history.pop(0) # Remove oldest # Usage in stt_client.py - Clear audio buffers @track_memory_usage('stt_transcription') async def transcribe_audio(audio_data): """Transcribe audio with cleanup""" try: result = await stt_api.transcribe(audio_data) return result finally: # Explicitly clear audio data del audio_data gc.collect() # Periodic memory cleanup @tasks.loop(minutes=30) async def cleanup_memory_periodically(): """Periodically clean up memory""" logger.info("Running periodic memory cleanup...") cleanup_memory() # Close resources properly class ResourceManager: """Manage HTTP sessions and other resources""" def __init__(self): self.http_session = None async def get_session(self): if self.http_session is None or self.http_session.closed: self.http_session = aiohttp.ClientSession() return self.http_session async def close(self): if self.http_session and not self.http_session.closed: await self.http_session.close() resource_manager = ResourceManager() # In bot.py cleanup @bot.event async def on_close(): """Clean up resources on shutdown""" await resource_manager.close() cleanup_memory() ``` ## Severity MEDIUM - Memory leaks cause crashes and poor long-term reliability. ## Files Affected bot/bot.py, bot/utils/voice_audio.py, cat-plugins/cat_client.py, bot/utils/llm.py, bot/stt_client.py, new file: bot/utils/memory_manager.py
Sign in to join this conversation.
No Label
1 Participants
Notifications
Due Date
No due date set.
Dependencies

No dependencies set.

Reference: Koko210/miku-discord#31