add: absorb soprano_to_rvc as regular subdirectory

Voice conversion pipeline (Soprano TTS → RVC) with Docker support. Previously tracked as bare gitlink; removed .git/ directories and absorbed into main repo for unified tracking. Includes: Soprano TTS, RVC WebUI integration, Docker configs, WebSocket API, and benchmark scripts. Updated .gitignore to exclude large model weights (*.pth, *.pt, *.onnx, *.index). 287 files (3.1GB of ML weights properly excluded via gitignore).
2026-03-04 00:24:53 +02:00
parent 34b184a05a
commit 8ca716029e
287 changed files with 47102 additions and 0 deletions
--- a/soprano_to_rvc/soprano_server.py
+++ b/soprano_to_rvc/soprano_server.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""
+Soprano Server - Runs on GTX 1660 (CUDA)
+Generates TTS audio and sends chunks to RVC via ZMQ
+"""
+
+import os
+import sys
+import time
+import json
+import logging
+import zmq
+import numpy as np
+
+# Remove current directory from path to avoid shadowing the soprano package
+if '' in sys.path:
+    sys.path.remove('')
+if '.' in sys.path:
+    sys.path.remove('.')
+current_dir = os.path.dirname(os.path.abspath(__file__))
+if current_dir in sys.path:
+    sys.path.remove(current_dir)
+
+from soprano import SopranoTTS
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+class SopranoServer:
+    def __init__(self, zmq_port=5555):
+        logger.info("Initializing Soprano Server (GTX 1660 CUDA)...")
+        
+        # Setup ZMQ
+        self.context = zmq.Context()
+        self.socket = self.context.socket(zmq.REP)  # Reply socket (receives requests, sends responses)
+        self.socket.bind(f"tcp://*:{zmq_port}")
+        logger.info(f"✓ ZMQ listening on port {zmq_port}")
+        
+        # Load Soprano model
+        logger.info("Loading Soprano TTS model...")
+        self.soprano = SopranoTTS(
+            backend='lmdeploy',
+            device='cuda',
+            cache_size_mb=500,
+            decoder_batch_size=2
+        )
+        logger.info(f"✓ Soprano loaded on {self.soprano.device} using {self.soprano.backend}")
+        
+    def process_job(self, job_data):
+        """Process a single TTS job and send chunks via ZMQ"""
+        job_id = job_data['job_id']
+        text = job_data['text']
+        
+        logger.info(f"[Job {job_id[:8]}] Processing: \"{text[:50]}...\"")
+        start_time = time.time()
+        
+        chunks_sent = 0
+        
+        try:
+            # Stream generation from Soprano
+            stream = self.soprano.infer_stream(text, chunk_size=10)
+            
+            for audio_chunk in stream:
+                chunks_sent += 1
+                logger.debug(f"[Job {job_id[:8]}] Generated chunk {chunks_sent}")
+            
+            # Send all chunks as one batch (for simplicity)
+            # In production, could stream chunk-by-chunk
+            full_audio = self.soprano.infer(text)
+            
+            elapsed = time.time() - start_time
+            audio_duration = len(full_audio) / 32000
+            realtime_factor = audio_duration / elapsed
+            
+            logger.info(f"[Job {job_id[:8]}] Complete: {audio_duration:.2f}s audio in {elapsed:.2f}s ({realtime_factor:.2f}x realtime)")
+            
+            # Return audio to RVC server
+            response = {
+                'job_id': job_id,
+                'audio': full_audio.tolist(),
+                'sample_rate': 32000,
+                'elapsed': elapsed,
+                'audio_duration': audio_duration
+            }
+            
+            return response
+            
+        except Exception as e:
+            logger.error(f"[Job {job_id[:8]}] Error: {e}", exc_info=True)
+            return {'job_id': job_id, 'error': str(e)}
+    
+    def run(self):
+        """Main loop: listen for requests via ZMQ"""
+        logger.info("Soprano Server ready - waiting for requests...")
+        
+        while True:
+            try:
+                # Wait for request from RVC server
+                message = self.socket.recv_json()
+                
+                if message.get('shutdown'):
+                    logger.info("Shutdown signal received")
+                    self.socket.send_json({'status': 'shutting down'})
+                    break
+                
+                # Process the TTS job
+                response = self.process_job(message)
+                
+                # Send response back
+                self.socket.send_json(response)
+                    
+            except KeyboardInterrupt:
+                logger.info("Shutting down...")
+                break
+            except Exception as e:
+                logger.error(f"Error in main loop: {e}", exc_info=True)
+                try:
+                    self.socket.send_json({'error': str(e)})
+                except:
+                    pass
+
+if __name__ == '__main__':
+    import argparse
+    
+    parser = argparse.ArgumentParser(description='Soprano TTS Server (CUDA)')
+    parser.add_argument('--port', type=int, default=5555, help='ZMQ port')
+    args = parser.parse_args()
+    
+    server = SopranoServer(zmq_port=args.port)
+    server.run()