Files
miku-discord/soprano_to_rvc/soprano_server.py

134 lines
4.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Soprano Server - Runs on GTX 1660 (CUDA)
Generates TTS audio and sends chunks to RVC via ZMQ
"""
import os
import sys
import time
import json
import logging
import zmq
import numpy as np
# Remove current directory from path to avoid shadowing the soprano package
if '' in sys.path:
sys.path.remove('')
if '.' in sys.path:
sys.path.remove('.')
current_dir = os.path.dirname(os.path.abspath(__file__))
if current_dir in sys.path:
sys.path.remove(current_dir)
from soprano import SopranoTTS
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class SopranoServer:
def __init__(self, zmq_port=5555):
logger.info("Initializing Soprano Server (GTX 1660 CUDA)...")
# Setup ZMQ
self.context = zmq.Context()
self.socket = self.context.socket(zmq.REP) # Reply socket (receives requests, sends responses)
self.socket.bind(f"tcp://*:{zmq_port}")
logger.info(f"✓ ZMQ listening on port {zmq_port}")
# Load Soprano model
logger.info("Loading Soprano TTS model...")
self.soprano = SopranoTTS(
backend='lmdeploy',
device='cuda',
cache_size_mb=500,
decoder_batch_size=2
)
logger.info(f"✓ Soprano loaded on {self.soprano.device} using {self.soprano.backend}")
def process_job(self, job_data):
"""Process a single TTS job and send chunks via ZMQ"""
job_id = job_data['job_id']
text = job_data['text']
logger.info(f"[Job {job_id[:8]}] Processing: \"{text[:50]}...\"")
start_time = time.time()
chunks_sent = 0
try:
# Stream generation from Soprano
stream = self.soprano.infer_stream(text, chunk_size=10)
for audio_chunk in stream:
chunks_sent += 1
logger.debug(f"[Job {job_id[:8]}] Generated chunk {chunks_sent}")
# Send all chunks as one batch (for simplicity)
# In production, could stream chunk-by-chunk
full_audio = self.soprano.infer(text)
elapsed = time.time() - start_time
audio_duration = len(full_audio) / 32000
realtime_factor = audio_duration / elapsed
logger.info(f"[Job {job_id[:8]}] Complete: {audio_duration:.2f}s audio in {elapsed:.2f}s ({realtime_factor:.2f}x realtime)")
# Return audio to RVC server
response = {
'job_id': job_id,
'audio': full_audio.tolist(),
'sample_rate': 32000,
'elapsed': elapsed,
'audio_duration': audio_duration
}
return response
except Exception as e:
logger.error(f"[Job {job_id[:8]}] Error: {e}", exc_info=True)
return {'job_id': job_id, 'error': str(e)}
def run(self):
"""Main loop: listen for requests via ZMQ"""
logger.info("Soprano Server ready - waiting for requests...")
while True:
try:
# Wait for request from RVC server
message = self.socket.recv_json()
if message.get('shutdown'):
logger.info("Shutdown signal received")
self.socket.send_json({'status': 'shutting down'})
break
# Process the TTS job
response = self.process_job(message)
# Send response back
self.socket.send_json(response)
except KeyboardInterrupt:
logger.info("Shutting down...")
break
except Exception as e:
logger.error(f"Error in main loop: {e}", exc_info=True)
try:
self.socket.send_json({'error': str(e)})
except:
pass
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Soprano TTS Server (CUDA)')
parser.add_argument('--port', type=int, default=5555, help='ZMQ port')
args = parser.parse_args()
server = SopranoServer(zmq_port=args.port)
server.run()