#!/usr/bin/env python3 """ WebSocket Client Example for Soprano + RVC Streaming API This demonstrates how to use the WebSocket endpoint from a Discord bot to stream audio as LLM tokens arrive. Usage: python websocket_client_example.py "Hello! How are you today?" """ import asyncio import websockets import json import sys import numpy as np import sounddevice as sd async def stream_tts(text: str, server_url: str = "ws://localhost:8765/ws/stream"): """ Stream TTS audio token-by-token, simulating LLM token streaming. Args: text: The text to synthesize server_url: WebSocket server URL """ print(f"Connecting to {server_url}...") async with websockets.connect(server_url) as websocket: print("Connected! Streaming tokens...") # Simulate token-by-token streaming # In real Discord bot, these come from llamacpp streaming response tokens = text.split() # Simple word-by-word tokenization # Audio playback setup sample_rate = 48000 audio_queue = asyncio.Queue() # Start audio playback task async def play_audio(): """Play audio chunks as they arrive""" stream = sd.OutputStream( samplerate=sample_rate, channels=1, dtype='float32' ) stream.start() try: while True: audio_bytes = await audio_queue.get() if audio_bytes is None: # Sentinel for end break # Convert bytes back to numpy array audio_data = np.frombuffer(audio_bytes, dtype=np.float32) stream.write(audio_data) print(f" ♪ Playing {len(audio_data)} samples") finally: stream.stop() stream.close() # Start playback task playback_task = asyncio.create_task(play_audio()) # Send tokens for i, token in enumerate(tokens): # Add space except for first token token_with_space = token if i == 0 else " " + token message = { "token": token_with_space, "pitch_shift": 0 # Adjust pitch if needed (-12 to +12 semitones) } await websocket.send(json.dumps(message)) print(f"→ Sent token: '{token_with_space}'") # Receive and queue audio try: # Non-blocking receive with timeout audio_bytes = await asyncio.wait_for( websocket.recv(), timeout=2.0 ) await audio_queue.put(audio_bytes) except asyncio.TimeoutError: print(" (no audio yet, continuing...)") except websockets.exceptions.ConnectionClosed: break # Flush remaining buffer print("→ Flushing buffer...") await websocket.send(json.dumps({"flush": True})) # Receive remaining audio chunks try: while True: audio_bytes = await asyncio.wait_for( websocket.recv(), timeout=1.0 ) await audio_queue.put(audio_bytes) except asyncio.TimeoutError: print(" (flush complete)") # Signal end of audio await audio_queue.put(None) await playback_task print("✓ Done!") async def stream_tts_simple(text: str, server_url: str = "ws://localhost:8765/ws/stream"): """ Simplified version for Discord bot integration. Returns audio chunks as they're generated. """ async with websockets.connect(server_url) as websocket: # Send tokens (in real bot, these come from LLM stream) tokens = text.split() for i, token in enumerate(tokens): token_with_space = token if i == 0 else " " + token await websocket.send(json.dumps({ "token": token_with_space, "pitch_shift": 0 })) # Yield audio chunks as they arrive try: audio_bytes = await asyncio.wait_for( websocket.recv(), timeout=2.0 ) yield audio_bytes except asyncio.TimeoutError: continue # Flush and get remaining audio await websocket.send(json.dumps({"flush": True})) try: while True: audio_bytes = await asyncio.wait_for( websocket.recv(), timeout=1.0 ) yield audio_bytes except asyncio.TimeoutError: pass # Example Discord.py integration """ # In your Discord bot cog: import discord import websockets import json class VoiceCog(commands.Cog): async def speak_streaming(self, ctx, text: str): '''Stream TTS to Discord voice channel''' # Connect to voice if not already connected if not ctx.voice_client: await ctx.author.voice.channel.connect() vc = ctx.voice_client # Stream audio via WebSocket async with websockets.connect('ws://localhost:8765/ws/stream') as ws: # Simulate token streaming (replace with actual LLM streaming) tokens = text.split() for token in tokens: # Send token await ws.send(json.dumps({ "token": " " + token, "pitch_shift": 0 })) # Receive audio try: audio_bytes = await asyncio.wait_for(ws.recv(), timeout=2.0) # Convert to Discord audio format audio_source = discord.PCMAudio(io.BytesIO(audio_bytes)) # Play (non-blocking) if not vc.is_playing(): vc.play(audio_source) except asyncio.TimeoutError: continue # Flush await ws.send(json.dumps({"flush": True})) """ if __name__ == "__main__": if len(sys.argv) < 2: text = "Hello! This is a test of the WebSocket streaming API. It should feel very natural!" else: text = " ".join(sys.argv[1:]) print(f"Text: {text}\n") asyncio.run(stream_tts(text))