add: absorb soprano_to_rvc as regular subdirectory

Voice conversion pipeline (Soprano TTS → RVC) with Docker support. Previously tracked as bare gitlink; removed .git/ directories and absorbed into main repo for unified tracking. Includes: Soprano TTS, RVC WebUI integration, Docker configs, WebSocket API, and benchmark scripts. Updated .gitignore to exclude large model weights (*.pth, *.pt, *.onnx, *.index). 287 files (3.1GB of ML weights properly excluded via gitignore).
2026-03-04 00:24:53 +02:00
parent 34b184a05a
commit 8ca716029e
287 changed files with 47102 additions and 0 deletions
--- a/soprano_to_rvc/test_ws_perf.py
+++ b/soprano_to_rvc/test_ws_perf.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+"""Quick WebSocket performance test for measuring realtime factor"""
+
+import asyncio
+import websockets
+import json
+import time
+
+async def test_synthesis():
+    uri = "ws://localhost:8765/ws/stream"
+    
+    # Test sentences of varying lengths
+    test_sentences = [
+        "Hello, this is a test.",
+        "The quick brown fox jumps over the lazy dog.",
+        "Artificial intelligence is revolutionizing the way we interact with technology."
+    ]
+    
+    async with websockets.connect(uri) as websocket:
+        print("Connected to WebSocket\n")
+        
+        for idx, sentence in enumerate(test_sentences, 1):
+            print(f"{'='*70}")
+            print(f"Test {idx}: {sentence}")
+            print('='*70)
+            
+            # Track client-side timing
+            client_start = time.time()
+            
+            # Send the sentence token by token
+            for char in sentence:
+                await websocket.send(json.dumps({"token": char}))
+            
+            # Flush to trigger synthesis
+            await websocket.send(json.dumps({"flush": True}))
+            
+            # Receive all audio chunks
+            chunk_count = 0
+            total_bytes = 0
+            first_chunk_time = None
+            
+            try:
+                while True:
+                    audio_data = await asyncio.wait_for(websocket.recv(), timeout=10.0)
+                    chunk_count += 1
+                    total_bytes += len(audio_data)
+                    
+                    if first_chunk_time is None:
+                        first_chunk_time = time.time() - client_start
+                        
+            except asyncio.TimeoutError:
+                client_end = time.time()
+                client_total = client_end - client_start
+                
+                # Calculate audio duration from received bytes
+                # Format: PCM float32, 48kHz, mono
+                # 4 bytes per sample, 48000 samples per second
+                samples = total_bytes / 4
+                audio_duration = samples / 48000
+                
+                print(f"\n📊 CLIENT-SIDE METRICS:")
+                print(f"  ├─ Chunks received: {chunk_count}")
+                print(f"  ├─ Total bytes: {total_bytes:,} ({total_bytes/1024:.1f} KB)")
+                print(f"  ├─ Audio duration: {audio_duration:.2f}s")
+                print(f"  ├─ Total time: {client_total:.2f}s")
+                print(f"  ├─ Time to first chunk: {first_chunk_time:.2f}s")
+                print(f"  └─ Client realtime factor: {audio_duration/client_total:.2f}x")
+                print("\n⏱️  SERVER-SIDE METRICS: (check logs above)\n")
+
+if __name__ == "__main__":
+    asyncio.run(test_synthesis())