#!/usr/bin/env python3 """ Soprano Server - Runs on GTX 1660 (CUDA) Generates TTS audio and sends chunks to RVC via ZMQ """ import os import sys import time import json import logging import zmq import numpy as np # Remove current directory from path to avoid shadowing the soprano package if '' in sys.path: sys.path.remove('') if '.' in sys.path: sys.path.remove('.') current_dir = os.path.dirname(os.path.abspath(__file__)) if current_dir in sys.path: sys.path.remove(current_dir) from soprano import SopranoTTS logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) class SopranoServer: def __init__(self, zmq_port=5555): logger.info("Initializing Soprano Server (GTX 1660 CUDA)...") # Setup ZMQ self.context = zmq.Context() self.socket = self.context.socket(zmq.REP) # Reply socket (receives requests, sends responses) self.socket.bind(f"tcp://*:{zmq_port}") logger.info(f"✓ ZMQ listening on port {zmq_port}") # Load Soprano model logger.info("Loading Soprano TTS model...") self.soprano = SopranoTTS( backend='lmdeploy', device='cuda', cache_size_mb=500, decoder_batch_size=2 ) logger.info(f"✓ Soprano loaded on {self.soprano.device} using {self.soprano.backend}") def process_job(self, job_data): """Process a single TTS job and send chunks via ZMQ""" job_id = job_data['job_id'] text = job_data['text'] logger.info(f"[Job {job_id[:8]}] Processing: \"{text[:50]}...\"") start_time = time.time() chunks_sent = 0 try: # Stream generation from Soprano stream = self.soprano.infer_stream(text, chunk_size=10) for audio_chunk in stream: chunks_sent += 1 logger.debug(f"[Job {job_id[:8]}] Generated chunk {chunks_sent}") # Send all chunks as one batch (for simplicity) # In production, could stream chunk-by-chunk full_audio = self.soprano.infer(text) elapsed = time.time() - start_time audio_duration = len(full_audio) / 32000 realtime_factor = audio_duration / elapsed logger.info(f"[Job {job_id[:8]}] Complete: {audio_duration:.2f}s audio in {elapsed:.2f}s ({realtime_factor:.2f}x realtime)") # Return audio to RVC server response = { 'job_id': job_id, 'audio': full_audio.tolist(), 'sample_rate': 32000, 'elapsed': elapsed, 'audio_duration': audio_duration } return response except Exception as e: logger.error(f"[Job {job_id[:8]}] Error: {e}", exc_info=True) return {'job_id': job_id, 'error': str(e)} def run(self): """Main loop: listen for requests via ZMQ""" logger.info("Soprano Server ready - waiting for requests...") while True: try: # Wait for request from RVC server message = self.socket.recv_json() if message.get('shutdown'): logger.info("Shutdown signal received") self.socket.send_json({'status': 'shutting down'}) break # Process the TTS job response = self.process_job(message) # Send response back self.socket.send_json(response) except KeyboardInterrupt: logger.info("Shutting down...") break except Exception as e: logger.error(f"Error in main loop: {e}", exc_info=True) try: self.socket.send_json({'error': str(e)}) except: pass if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Soprano TTS Server (CUDA)') parser.add_argument('--port', type=int, default=5555, help='ZMQ port') args = parser.parse_args() server = SopranoServer(zmq_port=args.port) server.run()