MEDIUM: Add Performance Logging #30

Open
opened 2026-02-16 22:53:49 +02:00 by Koko210 · 0 comments
Owner

Critical operations lack performance logging, making it difficult to identify bottlenecks and optimize bot responsiveness.

Where It Occurs

  • bot/bot.py - Message handling, voice events
  • bot/utils/llm.py - LLM API calls
  • bot/stt_client.py - Speech-to-text processing
  • bot/utils/voice_audio.py - Audio processing
  • cat-plugins/cat_client.py - Cheshire Cat calls

Why This Is a Problem

  1. No Visibility: Can't identify slow operations
  2. Poor Debugging: No metrics to diagnose performance issues
  3. No Optimization: Don't know where to focus efforts
  4. Silent Degradation: Performance degrades unnoticed

What Can Go Wrong

Scenario 1: Slow LLM Responses

  1. Users complain bot is slow
  2. No way to measure LLM API latency
  3. Don't know if LLM is slow or if there's network issue
  4. Can't set appropriate timeouts
  5. Can't optimize prompt to reduce latency

Scenario 2: Memory Leaks

  1. Bot becomes unresponsive after several hours
  2. No logging of memory usage
  3. Can't identify if memory is growing
  4. Don't know which operation is causing leak
  5. Hard to debug and fix

Scenario 3: Voice Chat Lag

  1. Voice responses become delayed over time
  2. No metrics on STT processing time
  3. No metrics on audio generation time
  4. Don't know where bottleneck is
  5. Can't optimize for better UX

Proposed Fix

Implement comprehensive performance logging:

# bot/utils/performance.py - NEW FILE
import time
import logging
import psutil
from functools import wraps
from typing import Callable, Optional
from contextlib import contextmanager
from collections import defaultdict
import asyncio

logger = logging.getLogger(__name__)

# Performance metrics storage
performance_metrics = defaultdict(list)

class PerformanceTracker:
    def __init__(self, operation_name: str):
        self.operation_name = operation_name
        self.start_time: Optional[float] = None
        self.start_memory: Optional[float] = None

    def __enter__(self):
        self.start_time = time.time()
        self.start_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        end_time = time.time()
        end_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
        
        duration = (end_time - self.start_time) * 1000  # ms
        memory_delta = end_memory - self.start_memory  # MB
        
        performance_metrics[self.operation_name].append({
            'duration_ms': duration,
            'memory_delta_mb': memory_delta,
            'timestamp': time.time()
        })
        
        logger.info(
            f"Performance: {self.operation_name} - "
            f"Duration: {duration:.2f}ms, "
            f"Memory Delta: {memory_delta:.2f}MB"
        )

def log_performance(operation_name: str):
    """Decorator to log performance of functions"""
    def decorator(func: Callable):
        @wraps(func)
        async def async_wrapper(*args, **kwargs):
            with PerformanceTracker(f"{operation_name}.{func.__name__}"):
                return await func(*args, **kwargs)
        
        @wraps(func)
        def sync_wrapper(*args, **kwargs):
            with PerformanceTracker(f"{operation_name}.{func.__name__}"):
                return func(*args, **kwargs)
        
        if asyncio.iscoroutinefunction(func):
            return async_wrapper
        else:
            return sync_wrapper
    
    return decorator

@contextmanager
def track_operation(operation_name: str):
    """Context manager to track custom operations"""
    with PerformanceTracker(operation_name):
        yield

def get_performance_stats(operation_name: Optional[str] = None) -> dict:
    """Get performance statistics"""
    if operation_name:
        metrics = performance_metrics.get(operation_name, [])
    else:
        metrics = [m for ops in performance_metrics.values() for m in ops]
    
    if not metrics:
        return {}
    
    durations = [m['duration_ms'] for m in metrics]
    memory_deltas = [m['memory_delta_mb'] for m in metrics]
    
    return {
        'count': len(metrics),
        'duration': {
            'min': min(durations),
            'max': max(durations),
            'avg': sum(durations) / len(durations),
            'p50': sorted(durations)[len(durations)//2],
            'p95': sorted(durations)[int(len(durations)*0.95)],
            'p99': sorted(durations)[int(len(durations)*0.99)],
        },
        'memory': {
            'min': min(memory_deltas),
            'max': max(memory_deltas),
            'avg': sum(memory_deltas) / len(memory_deltas),
        }
    }

def log_system_health():
    """Log current system health metrics"""
    process = psutil.Process()
    cpu_percent = process.cpu_percent(interval=0.1)
    memory_info = process.memory_info()
    
    logger.info(
        f"System Health - "
        f"CPU: {cpu_percent}%, "
        f"Memory: {memory_info.rss / 1024 / 1024:.2f}MB, "
        f"Threads: {process.num_threads()}, "
        f"Open Files: {process.num_fds()}"
    )

# Usage examples
@log_performance('llm')
async def call_llm_api(prompt: str):
    async with aiohttp.ClientSession() as session:
        async with session.post(LLM_URL, json={"prompt": prompt}) as resp:
            return await resp.json()

# In bot.py message handler
@bot.event
async def on_message(message):
    with track_operation('message_processing'):
        # Process message
        response = await generate_response(message)
        await message.channel.send(response)

# In stt_client.py
async def transcribe_audio(audio_data):
    with track_operation('stt_transcription'):
        result = await stt_api.transcribe(audio_data)
        return result

# Periodic health logging
@tasks.loop(minutes=5)
async def log_health_periodically():
    log_system_health()
    
    # Log performance stats for key operations
    for operation in ['llm.call_llm_api', 'stt_transcription', 'message_processing']:
        stats = get_performance_stats(operation)
        if stats:
            logger.info(f"Performance Stats for {operation}: {stats}")

Severity

MEDIUM - Lack of performance logging makes debugging and optimization difficult.

Files Affected

bot/bot.py, bot/utils/llm.py, bot/stt_client.py, bot/utils/voice_audio.py, cat-plugins/cat_client.py, new file: bot/utils/performance.py

Critical operations lack performance logging, making it difficult to identify bottlenecks and optimize bot responsiveness. ## Where It Occurs - bot/bot.py - Message handling, voice events - bot/utils/llm.py - LLM API calls - bot/stt_client.py - Speech-to-text processing - bot/utils/voice_audio.py - Audio processing - cat-plugins/cat_client.py - Cheshire Cat calls ## Why This Is a Problem 1. No Visibility: Can't identify slow operations 2. Poor Debugging: No metrics to diagnose performance issues 3. No Optimization: Don't know where to focus efforts 4. Silent Degradation: Performance degrades unnoticed ## What Can Go Wrong ### Scenario 1: Slow LLM Responses 1. Users complain bot is slow 2. No way to measure LLM API latency 3. Don't know if LLM is slow or if there's network issue 4. Can't set appropriate timeouts 5. Can't optimize prompt to reduce latency ### Scenario 2: Memory Leaks 1. Bot becomes unresponsive after several hours 2. No logging of memory usage 3. Can't identify if memory is growing 4. Don't know which operation is causing leak 5. Hard to debug and fix ### Scenario 3: Voice Chat Lag 1. Voice responses become delayed over time 2. No metrics on STT processing time 3. No metrics on audio generation time 4. Don't know where bottleneck is 5. Can't optimize for better UX ## Proposed Fix Implement comprehensive performance logging: ```python # bot/utils/performance.py - NEW FILE import time import logging import psutil from functools import wraps from typing import Callable, Optional from contextlib import contextmanager from collections import defaultdict import asyncio logger = logging.getLogger(__name__) # Performance metrics storage performance_metrics = defaultdict(list) class PerformanceTracker: def __init__(self, operation_name: str): self.operation_name = operation_name self.start_time: Optional[float] = None self.start_memory: Optional[float] = None def __enter__(self): self.start_time = time.time() self.start_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB return self def __exit__(self, exc_type, exc_val, exc_tb): end_time = time.time() end_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB duration = (end_time - self.start_time) * 1000 # ms memory_delta = end_memory - self.start_memory # MB performance_metrics[self.operation_name].append({ 'duration_ms': duration, 'memory_delta_mb': memory_delta, 'timestamp': time.time() }) logger.info( f"Performance: {self.operation_name} - " f"Duration: {duration:.2f}ms, " f"Memory Delta: {memory_delta:.2f}MB" ) def log_performance(operation_name: str): """Decorator to log performance of functions""" def decorator(func: Callable): @wraps(func) async def async_wrapper(*args, **kwargs): with PerformanceTracker(f"{operation_name}.{func.__name__}"): return await func(*args, **kwargs) @wraps(func) def sync_wrapper(*args, **kwargs): with PerformanceTracker(f"{operation_name}.{func.__name__}"): return func(*args, **kwargs) if asyncio.iscoroutinefunction(func): return async_wrapper else: return sync_wrapper return decorator @contextmanager def track_operation(operation_name: str): """Context manager to track custom operations""" with PerformanceTracker(operation_name): yield def get_performance_stats(operation_name: Optional[str] = None) -> dict: """Get performance statistics""" if operation_name: metrics = performance_metrics.get(operation_name, []) else: metrics = [m for ops in performance_metrics.values() for m in ops] if not metrics: return {} durations = [m['duration_ms'] for m in metrics] memory_deltas = [m['memory_delta_mb'] for m in metrics] return { 'count': len(metrics), 'duration': { 'min': min(durations), 'max': max(durations), 'avg': sum(durations) / len(durations), 'p50': sorted(durations)[len(durations)//2], 'p95': sorted(durations)[int(len(durations)*0.95)], 'p99': sorted(durations)[int(len(durations)*0.99)], }, 'memory': { 'min': min(memory_deltas), 'max': max(memory_deltas), 'avg': sum(memory_deltas) / len(memory_deltas), } } def log_system_health(): """Log current system health metrics""" process = psutil.Process() cpu_percent = process.cpu_percent(interval=0.1) memory_info = process.memory_info() logger.info( f"System Health - " f"CPU: {cpu_percent}%, " f"Memory: {memory_info.rss / 1024 / 1024:.2f}MB, " f"Threads: {process.num_threads()}, " f"Open Files: {process.num_fds()}" ) # Usage examples @log_performance('llm') async def call_llm_api(prompt: str): async with aiohttp.ClientSession() as session: async with session.post(LLM_URL, json={"prompt": prompt}) as resp: return await resp.json() # In bot.py message handler @bot.event async def on_message(message): with track_operation('message_processing'): # Process message response = await generate_response(message) await message.channel.send(response) # In stt_client.py async def transcribe_audio(audio_data): with track_operation('stt_transcription'): result = await stt_api.transcribe(audio_data) return result # Periodic health logging @tasks.loop(minutes=5) async def log_health_periodically(): log_system_health() # Log performance stats for key operations for operation in ['llm.call_llm_api', 'stt_transcription', 'message_processing']: stats = get_performance_stats(operation) if stats: logger.info(f"Performance Stats for {operation}: {stats}") ``` ## Severity MEDIUM - Lack of performance logging makes debugging and optimization difficult. ## Files Affected bot/bot.py, bot/utils/llm.py, bot/stt_client.py, bot/utils/voice_audio.py, cat-plugins/cat_client.py, new file: bot/utils/performance.py
Sign in to join this conversation.
No Label
1 Participants
Notifications
Due Date
No due date set.
Dependencies

No dependencies set.

Reference: Koko210/miku-discord#30