Voice conversion pipeline (Soprano TTS → RVC) with Docker support. Previously tracked as bare gitlink; removed .git/ directories and absorbed into main repo for unified tracking. Includes: Soprano TTS, RVC WebUI integration, Docker configs, WebSocket API, and benchmark scripts. Updated .gitignore to exclude large model weights (*.pth, *.pt, *.onnx, *.index). 287 files (3.1GB of ML weights properly excluded via gitignore).
72 lines
2.7 KiB
Python
72 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Quick WebSocket performance test for measuring realtime factor"""
|
|
|
|
import asyncio
|
|
import websockets
|
|
import json
|
|
import time
|
|
|
|
async def test_synthesis():
|
|
uri = "ws://localhost:8765/ws/stream"
|
|
|
|
# Test sentences of varying lengths
|
|
test_sentences = [
|
|
"Hello, this is a test.",
|
|
"The quick brown fox jumps over the lazy dog.",
|
|
"Artificial intelligence is revolutionizing the way we interact with technology."
|
|
]
|
|
|
|
async with websockets.connect(uri) as websocket:
|
|
print("Connected to WebSocket\n")
|
|
|
|
for idx, sentence in enumerate(test_sentences, 1):
|
|
print(f"{'='*70}")
|
|
print(f"Test {idx}: {sentence}")
|
|
print('='*70)
|
|
|
|
# Track client-side timing
|
|
client_start = time.time()
|
|
|
|
# Send the sentence token by token
|
|
for char in sentence:
|
|
await websocket.send(json.dumps({"token": char}))
|
|
|
|
# Flush to trigger synthesis
|
|
await websocket.send(json.dumps({"flush": True}))
|
|
|
|
# Receive all audio chunks
|
|
chunk_count = 0
|
|
total_bytes = 0
|
|
first_chunk_time = None
|
|
|
|
try:
|
|
while True:
|
|
audio_data = await asyncio.wait_for(websocket.recv(), timeout=10.0)
|
|
chunk_count += 1
|
|
total_bytes += len(audio_data)
|
|
|
|
if first_chunk_time is None:
|
|
first_chunk_time = time.time() - client_start
|
|
|
|
except asyncio.TimeoutError:
|
|
client_end = time.time()
|
|
client_total = client_end - client_start
|
|
|
|
# Calculate audio duration from received bytes
|
|
# Format: PCM float32, 48kHz, mono
|
|
# 4 bytes per sample, 48000 samples per second
|
|
samples = total_bytes / 4
|
|
audio_duration = samples / 48000
|
|
|
|
print(f"\n📊 CLIENT-SIDE METRICS:")
|
|
print(f" ├─ Chunks received: {chunk_count}")
|
|
print(f" ├─ Total bytes: {total_bytes:,} ({total_bytes/1024:.1f} KB)")
|
|
print(f" ├─ Audio duration: {audio_duration:.2f}s")
|
|
print(f" ├─ Total time: {client_total:.2f}s")
|
|
print(f" ├─ Time to first chunk: {first_chunk_time:.2f}s")
|
|
print(f" └─ Client realtime factor: {audio_duration/client_total:.2f}x")
|
|
print("\n⏱️ SERVER-SIDE METRICS: (check logs above)\n")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test_synthesis())
|