add: absorb soprano_to_rvc as regular subdirectory

Voice conversion pipeline (Soprano TTS → RVC) with Docker support.
Previously tracked as bare gitlink; removed .git/ directories and
absorbed into main repo for unified tracking.

Includes: Soprano TTS, RVC WebUI integration, Docker configs,
WebSocket API, and benchmark scripts.
Updated .gitignore to exclude large model weights (*.pth, *.pt, *.onnx, *.index).
287 files (3.1GB of ML weights properly excluded via gitignore).
This commit is contained in:
2026-03-04 00:24:53 +02:00
parent 34b184a05a
commit 8ca716029e
287 changed files with 47102 additions and 0 deletions

View File

@@ -0,0 +1,133 @@
#!/usr/bin/env python3
"""
Soprano Server - Runs on GTX 1660 (CUDA)
Generates TTS audio and sends chunks to RVC via ZMQ
"""
import os
import sys
import time
import json
import logging
import zmq
import numpy as np
# Remove current directory from path to avoid shadowing the soprano package
if '' in sys.path:
sys.path.remove('')
if '.' in sys.path:
sys.path.remove('.')
current_dir = os.path.dirname(os.path.abspath(__file__))
if current_dir in sys.path:
sys.path.remove(current_dir)
from soprano import SopranoTTS
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class SopranoServer:
def __init__(self, zmq_port=5555):
logger.info("Initializing Soprano Server (GTX 1660 CUDA)...")
# Setup ZMQ
self.context = zmq.Context()
self.socket = self.context.socket(zmq.REP) # Reply socket (receives requests, sends responses)
self.socket.bind(f"tcp://*:{zmq_port}")
logger.info(f"✓ ZMQ listening on port {zmq_port}")
# Load Soprano model
logger.info("Loading Soprano TTS model...")
self.soprano = SopranoTTS(
backend='lmdeploy',
device='cuda',
cache_size_mb=500,
decoder_batch_size=2
)
logger.info(f"✓ Soprano loaded on {self.soprano.device} using {self.soprano.backend}")
def process_job(self, job_data):
"""Process a single TTS job and send chunks via ZMQ"""
job_id = job_data['job_id']
text = job_data['text']
logger.info(f"[Job {job_id[:8]}] Processing: \"{text[:50]}...\"")
start_time = time.time()
chunks_sent = 0
try:
# Stream generation from Soprano
stream = self.soprano.infer_stream(text, chunk_size=10)
for audio_chunk in stream:
chunks_sent += 1
logger.debug(f"[Job {job_id[:8]}] Generated chunk {chunks_sent}")
# Send all chunks as one batch (for simplicity)
# In production, could stream chunk-by-chunk
full_audio = self.soprano.infer(text)
elapsed = time.time() - start_time
audio_duration = len(full_audio) / 32000
realtime_factor = audio_duration / elapsed
logger.info(f"[Job {job_id[:8]}] Complete: {audio_duration:.2f}s audio in {elapsed:.2f}s ({realtime_factor:.2f}x realtime)")
# Return audio to RVC server
response = {
'job_id': job_id,
'audio': full_audio.tolist(),
'sample_rate': 32000,
'elapsed': elapsed,
'audio_duration': audio_duration
}
return response
except Exception as e:
logger.error(f"[Job {job_id[:8]}] Error: {e}", exc_info=True)
return {'job_id': job_id, 'error': str(e)}
def run(self):
"""Main loop: listen for requests via ZMQ"""
logger.info("Soprano Server ready - waiting for requests...")
while True:
try:
# Wait for request from RVC server
message = self.socket.recv_json()
if message.get('shutdown'):
logger.info("Shutdown signal received")
self.socket.send_json({'status': 'shutting down'})
break
# Process the TTS job
response = self.process_job(message)
# Send response back
self.socket.send_json(response)
except KeyboardInterrupt:
logger.info("Shutting down...")
break
except Exception as e:
logger.error(f"Error in main loop: {e}", exc_info=True)
try:
self.socket.send_json({'error': str(e)})
except:
pass
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Soprano TTS Server (CUDA)')
parser.add_argument('--port', type=int, default=5555, help='ZMQ port')
args = parser.parse_args()
server = SopranoServer(zmq_port=args.port)
server.run()