add: absorb soprano_to_rvc as regular subdirectory
Voice conversion pipeline (Soprano TTS → RVC) with Docker support. Previously tracked as bare gitlink; removed .git/ directories and absorbed into main repo for unified tracking. Includes: Soprano TTS, RVC WebUI integration, Docker configs, WebSocket API, and benchmark scripts. Updated .gitignore to exclude large model weights (*.pth, *.pt, *.onnx, *.index). 287 files (3.1GB of ML weights properly excluded via gitignore).
This commit is contained in:
208
soprano_to_rvc/websocket_client_example.py
Executable file
208
soprano_to_rvc/websocket_client_example.py
Executable file
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
WebSocket Client Example for Soprano + RVC Streaming API
|
||||
|
||||
This demonstrates how to use the WebSocket endpoint from a Discord bot
|
||||
to stream audio as LLM tokens arrive.
|
||||
|
||||
Usage:
|
||||
python websocket_client_example.py "Hello! How are you today?"
|
||||
"""
|
||||
import asyncio
|
||||
import websockets
|
||||
import json
|
||||
import sys
|
||||
import numpy as np
|
||||
import sounddevice as sd
|
||||
|
||||
async def stream_tts(text: str, server_url: str = "ws://localhost:8765/ws/stream"):
|
||||
"""
|
||||
Stream TTS audio token-by-token, simulating LLM token streaming.
|
||||
|
||||
Args:
|
||||
text: The text to synthesize
|
||||
server_url: WebSocket server URL
|
||||
"""
|
||||
print(f"Connecting to {server_url}...")
|
||||
|
||||
async with websockets.connect(server_url) as websocket:
|
||||
print("Connected! Streaming tokens...")
|
||||
|
||||
# Simulate token-by-token streaming
|
||||
# In real Discord bot, these come from llamacpp streaming response
|
||||
tokens = text.split() # Simple word-by-word tokenization
|
||||
|
||||
# Audio playback setup
|
||||
sample_rate = 48000
|
||||
audio_queue = asyncio.Queue()
|
||||
|
||||
# Start audio playback task
|
||||
async def play_audio():
|
||||
"""Play audio chunks as they arrive"""
|
||||
stream = sd.OutputStream(
|
||||
samplerate=sample_rate,
|
||||
channels=1,
|
||||
dtype='float32'
|
||||
)
|
||||
stream.start()
|
||||
|
||||
try:
|
||||
while True:
|
||||
audio_bytes = await audio_queue.get()
|
||||
if audio_bytes is None: # Sentinel for end
|
||||
break
|
||||
|
||||
# Convert bytes back to numpy array
|
||||
audio_data = np.frombuffer(audio_bytes, dtype=np.float32)
|
||||
stream.write(audio_data)
|
||||
print(f" ♪ Playing {len(audio_data)} samples")
|
||||
finally:
|
||||
stream.stop()
|
||||
stream.close()
|
||||
|
||||
# Start playback task
|
||||
playback_task = asyncio.create_task(play_audio())
|
||||
|
||||
# Send tokens
|
||||
for i, token in enumerate(tokens):
|
||||
# Add space except for first token
|
||||
token_with_space = token if i == 0 else " " + token
|
||||
|
||||
message = {
|
||||
"token": token_with_space,
|
||||
"pitch_shift": 0 # Adjust pitch if needed (-12 to +12 semitones)
|
||||
}
|
||||
|
||||
await websocket.send(json.dumps(message))
|
||||
print(f"→ Sent token: '{token_with_space}'")
|
||||
|
||||
# Receive and queue audio
|
||||
try:
|
||||
# Non-blocking receive with timeout
|
||||
audio_bytes = await asyncio.wait_for(
|
||||
websocket.recv(),
|
||||
timeout=2.0
|
||||
)
|
||||
await audio_queue.put(audio_bytes)
|
||||
except asyncio.TimeoutError:
|
||||
print(" (no audio yet, continuing...)")
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
break
|
||||
|
||||
# Flush remaining buffer
|
||||
print("→ Flushing buffer...")
|
||||
await websocket.send(json.dumps({"flush": True}))
|
||||
|
||||
# Receive remaining audio chunks
|
||||
try:
|
||||
while True:
|
||||
audio_bytes = await asyncio.wait_for(
|
||||
websocket.recv(),
|
||||
timeout=1.0
|
||||
)
|
||||
await audio_queue.put(audio_bytes)
|
||||
except asyncio.TimeoutError:
|
||||
print(" (flush complete)")
|
||||
|
||||
# Signal end of audio
|
||||
await audio_queue.put(None)
|
||||
await playback_task
|
||||
|
||||
print("✓ Done!")
|
||||
|
||||
async def stream_tts_simple(text: str, server_url: str = "ws://localhost:8765/ws/stream"):
|
||||
"""
|
||||
Simplified version for Discord bot integration.
|
||||
Returns audio chunks as they're generated.
|
||||
"""
|
||||
async with websockets.connect(server_url) as websocket:
|
||||
# Send tokens (in real bot, these come from LLM stream)
|
||||
tokens = text.split()
|
||||
|
||||
for i, token in enumerate(tokens):
|
||||
token_with_space = token if i == 0 else " " + token
|
||||
|
||||
await websocket.send(json.dumps({
|
||||
"token": token_with_space,
|
||||
"pitch_shift": 0
|
||||
}))
|
||||
|
||||
# Yield audio chunks as they arrive
|
||||
try:
|
||||
audio_bytes = await asyncio.wait_for(
|
||||
websocket.recv(),
|
||||
timeout=2.0
|
||||
)
|
||||
yield audio_bytes
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
|
||||
# Flush and get remaining audio
|
||||
await websocket.send(json.dumps({"flush": True}))
|
||||
|
||||
try:
|
||||
while True:
|
||||
audio_bytes = await asyncio.wait_for(
|
||||
websocket.recv(),
|
||||
timeout=1.0
|
||||
)
|
||||
yield audio_bytes
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
# Example Discord.py integration
|
||||
"""
|
||||
# In your Discord bot cog:
|
||||
|
||||
import discord
|
||||
import websockets
|
||||
import json
|
||||
|
||||
class VoiceCog(commands.Cog):
|
||||
async def speak_streaming(self, ctx, text: str):
|
||||
'''Stream TTS to Discord voice channel'''
|
||||
|
||||
# Connect to voice if not already connected
|
||||
if not ctx.voice_client:
|
||||
await ctx.author.voice.channel.connect()
|
||||
|
||||
vc = ctx.voice_client
|
||||
|
||||
# Stream audio via WebSocket
|
||||
async with websockets.connect('ws://localhost:8765/ws/stream') as ws:
|
||||
# Simulate token streaming (replace with actual LLM streaming)
|
||||
tokens = text.split()
|
||||
|
||||
for token in tokens:
|
||||
# Send token
|
||||
await ws.send(json.dumps({
|
||||
"token": " " + token,
|
||||
"pitch_shift": 0
|
||||
}))
|
||||
|
||||
# Receive audio
|
||||
try:
|
||||
audio_bytes = await asyncio.wait_for(ws.recv(), timeout=2.0)
|
||||
|
||||
# Convert to Discord audio format
|
||||
audio_source = discord.PCMAudio(io.BytesIO(audio_bytes))
|
||||
|
||||
# Play (non-blocking)
|
||||
if not vc.is_playing():
|
||||
vc.play(audio_source)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
|
||||
# Flush
|
||||
await ws.send(json.dumps({"flush": True}))
|
||||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
text = "Hello! This is a test of the WebSocket streaming API. It should feel very natural!"
|
||||
else:
|
||||
text = " ".join(sys.argv[1:])
|
||||
|
||||
print(f"Text: {text}\n")
|
||||
asyncio.run(stream_tts(text))
|
||||
Reference in New Issue
Block a user