Working with GUI, auto loopback creation, soprano streaming

2026-01-12 22:55:21 +02:00
commit 942ca36252
12 changed files with 1984 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
 .venv
 *.pth
--- a/1
+++ b/1
--- a/cleanup_virtual_sinks.sh
+++ b/cleanup_virtual_sinks.sh
@@ -0,0 +1,29 @@
 #!/bin/bash
 # Cleanup script to remove any leftover virtual sinks
 # Run this if you encounter issues with the virtual sink
 echo "🧹 Cleaning up virtual audio sinks..."
 echo ""
 # Find and remove soprano virtual sinks
 MODULES=$(pactl list modules short | grep -E "soprano|rvc" | awk '{print $1}')
 if [ -z "$MODULES" ]; then
    echo "✓ No virtual sinks found. Nothing to clean up."
 else
    echo "Found virtual sink modules to remove:"
    pactl list modules short | grep -E "soprano|rvc"
    echo ""
    for MODULE in $MODULES; do
        echo "Removing module $MODULE..."
        pactl unload-module "$MODULE"
    done
    echo ""
    echo "✓ Cleanup complete!"
 fi
 echo ""
 echo "Current audio sinks:"
 pactl list sinks short
--- a/constraints.txt
+++ b/constraints.txt
@@ -0,0 +1,24 @@
 # ==========================================================
 # HARD CONSTRAINTS — DO NOT UPGRADE CASUALLY
 # ==========================================================
 python_version == "3.10.19"
 # Torch / ROCm ABI lock
 torch == 2.5.1+rocm6.2
 torchaudio == 2.5.1+rocm6.2
 torchvision == 0.20.1+rocm6.2
 pytorch-triton-rocm == 3.1.0
 # NumPy / Numba compatibility
 numpy < 1.24
 numba == 0.56.4
 llvmlite == 0.39.0
 # RVC core
 fairseq == 0.12.2
 faiss-cpu == 1.7.3
 pyworld < 0.4
 # Gradio pin (RVC WebUI tested)
 gradio == 3.48.0
--- a/gui_v1.py.backup
+++ b/gui_v1.py.backup
--- a/launch_soprano_rvc.sh
+++ b/launch_soprano_rvc.sh
@@ -0,0 +1,260 @@
 #!/bin/bash
 # Soprano TTS to RVC Pipeline Launcher
 # This script helps you set up and run the soprano->RVC pipeline
 set -e
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 VENV_PATH="$SCRIPT_DIR/.venv"
 RVC_DIR="$SCRIPT_DIR/Retrieval-based-Voice-Conversion-WebUI"
 RVC_GUI="$RVC_DIR/gui_v1.py"
 SOPRANO_SCRIPT="$SCRIPT_DIR/soprano_to_virtual_sink.py"
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 NC='\033[0m' # No Color
 # Print colored output
 print_info() {
    echo -e "${BLUE}ℹ ${NC}$1"
 }
 print_success() {
    echo -e "${GREEN}✓${NC} $1"
 }
 print_warning() {
    echo -e "${YELLOW}⚠${NC} $1"
 }
 print_error() {
    echo -e "${RED}✗${NC} $1"
 }
 print_header() {
    echo ""
    echo -e "${BLUE}═══════════════════════════════════════════════════════════════════${NC}"
    echo -e "${BLUE}  $1${NC}"
    echo -e "${BLUE}═══════════════════════════════════════════════════════════════════${NC}"
    echo ""
 }
 # Check prerequisites
 check_prerequisites() {
    print_header "Checking Prerequisites"
    # Check if virtual environment exists
    if [ ! -d "$VENV_PATH" ]; then
        print_error "Virtual environment not found at: $VENV_PATH"
        exit 1
    fi
    print_success "Virtual environment found"
    # Check if RVC GUI exists
    if [ ! -f "$RVC_GUI" ]; then
        print_error "RVC GUI not found at: $RVC_GUI"
        exit 1
    fi
    print_success "RVC GUI found"
    # Check if soprano script exists
    if [ ! -f "$SOPRANO_SCRIPT" ]; then
        print_error "Soprano script not found at: $SOPRANO_SCRIPT"
        exit 1
    fi
    print_success "Soprano script found"
    # Check if pactl is available (PulseAudio)
    if ! command -v pactl &> /dev/null; then
        print_error "pactl (PulseAudio) not found. Please install PulseAudio."
        exit 1
    fi
    print_success "PulseAudio found"
 }
 # Display usage instructions
 show_usage() {
    print_header "Soprano TTS to RVC Pipeline"
    echo "This script helps you run a text-to-speech pipeline where:"
    echo "  1. You type text into the Soprano TTS script"
    echo "  2. Soprano generates speech and outputs to a virtual sink"
    echo "  3. RVC reads from that virtual sink and applies voice conversion"
    echo "  4. RVC outputs the converted voice to your speakers/headphones"
    echo ""
    echo "Usage:"
    echo "  $0 [option]"
    echo ""
    echo "Options:"
    echo "  soprano      - Start only the Soprano TTS virtual sink script"
    echo "  rvc          - Start only the RVC realtime GUI"
    echo "  both         - Start both in separate terminal windows (default)"
    echo "  help         - Show this help message"
    echo ""
 }
 # Start soprano script
 start_soprano() {
    print_header "Starting Soprano TTS Virtual Sink"
    print_info "Activating virtual environment..."
    source "$VENV_PATH/bin/activate"
    print_info "Starting soprano_to_virtual_sink.py..."
    print_info "This will create a virtual sink: soprano_to_rvc"
    echo ""
    python "$SOPRANO_SCRIPT"
 }
 # Start RVC GUI
 start_rvc() {
    print_header "Starting RVC Realtime GUI"
    print_info "Activating virtual environment..."
    source "$VENV_PATH/bin/activate"
    print_info "Changing to RVC directory..."
    cd "$RVC_DIR"
    print_info "Starting RVC GUI..."
    echo ""
    print_warning "IMPORTANT: In the RVC GUI, select 'soprano_to_rvc.monitor' as your INPUT device!"
    echo ""
    sleep 2
    python "$RVC_GUI"
 }
 # Start both in separate terminals
 start_both() {
    print_header "Starting Both Components"
    print_info "This will open two terminal windows:"
    print_info "  1. Soprano TTS Virtual Sink (for text input)"
    print_info "  2. RVC Realtime GUI (for voice conversion)"
    echo ""
    # Detect terminal emulator
    TERMINAL=""
    if command -v gnome-terminal &> /dev/null; then
        TERMINAL="gnome-terminal"
    elif command -v konsole &> /dev/null; then
        TERMINAL="konsole"
    elif command -v xfce4-terminal &> /dev/null; then
        TERMINAL="xfce4-terminal"
    elif command -v alacritty &> /dev/null; then
        TERMINAL="alacritty"
    elif command -v kitty &> /dev/null; then
        TERMINAL="kitty"
    elif command -v xterm &> /dev/null; then
        TERMINAL="xterm"
    else
        print_error "No suitable terminal emulator found"
        print_info "Please start the components manually:"
        print_info "  Terminal 1: $0 soprano"
        print_info "  Terminal 2: $0 rvc"
        exit 1
    fi
    print_success "Using terminal: $TERMINAL"
    # Start soprano in new terminal
    print_info "Starting Soprano TTS in new terminal..."
    case "$TERMINAL" in
        gnome-terminal)
            gnome-terminal -- bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
            ;;
        konsole)
            konsole -e bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
            ;;
        xfce4-terminal)
            xfce4-terminal -e "bash -c \"cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash\"" &
            ;;
        alacritty)
            alacritty -e bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
            ;;
        kitty)
            kitty bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
            ;;
        xterm)
            xterm -e bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
            ;;
    esac
    sleep 2
    # Start RVC in new terminal
    print_info "Starting RVC GUI in new terminal..."
    case "$TERMINAL" in
        gnome-terminal)
            gnome-terminal -- bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
            ;;
        konsole)
            konsole -e bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
            ;;
        xfce4-terminal)
            xfce4-terminal -e "bash -c \"cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash\"" &
            ;;
        alacritty)
            alacritty -e bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
            ;;
        kitty)
            kitty bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
            ;;
        xterm)
            xterm -e bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
            ;;
    esac
    echo ""
    print_success "Both components started in separate terminals"
    echo ""
    print_header "Quick Setup Guide"
    echo "1. In the RVC GUI window:"
    echo "   - Select your RVC model (.pth file)"
    echo "   - Select the corresponding index file"
    echo "   - Choose 'soprano_to_rvc.monitor' as INPUT device"
    echo "   - Choose your speakers/headphones as OUTPUT device"
    echo "   - Click 'Start Voice Conversion'"
    echo ""
    echo "2. In the Soprano TTS window:"
    echo "   - Type any text you want to convert"
    echo "   - Press Enter to generate and stream"
    echo ""
    echo "3. Listen to the RVC-converted output!"
    echo ""
    print_info "Press Ctrl+C in each terminal to stop"
    echo ""
 }
 # Main script
 main() {
    case "${1:-both}" in
        soprano)
            check_prerequisites
            start_soprano
            ;;
        rvc)
            check_prerequisites
            start_rvc
            ;;
        both)
            check_prerequisites
            start_both
            ;;
        help|--help|-h)
            show_usage
            ;;
        *)
            print_error "Unknown option: $1"
            show_usage
            exit 1
            ;;
    esac
 }
 main "$@"
--- a/python-version.txt
+++ b/python-version.txt
@@ -0,0 +1 @@
 3.10.19
--- a/requirements.lock.txt
+++ b/requirements.lock.txt
@@ -0,0 +1,159 @@
 absl-py==2.3.1
 accelerate==1.12.0
 aiofiles==23.2.1
 aiohappyeyeballs==2.6.1
 aiohttp==3.13.3
 aiosignal==1.4.0
 altair==5.5.0
 annotated-doc==0.0.4
 annotated-types==0.7.0
 antlr4-python3-runtime==4.8
 anyio==4.12.1
 async-timeout==5.0.1
 attrs==25.4.0
 audioread==3.1.0
 av==16.1.0
 bitarray==3.8.0
 brotli==1.2.0
 certifi==2026.1.4
 cffi==2.0.0
 charset-normalizer==3.4.4
 click==8.3.1
 colorama==0.4.6
 coloredlogs==15.0.1
 contourpy==1.3.2
 cycler==0.12.1
 Cython==3.2.4
 decorator==5.2.1
 einops==0.8.1
 exceptiongroup==1.3.1
 fairseq==0.12.2
 faiss-cpu==1.7.3
 fastapi==0.88.0
 ffmpeg-python==0.2.0
 ffmpy==0.3.1
 filelock==3.20.0
 flatbuffers==25.12.19
 fonttools==4.61.1
 frozenlist==1.8.0
 fsspec==2025.12.0
 future==1.0.0
 gradio==3.48.0
 gradio_client==0.6.1
 groovy==0.1.2
 grpcio==1.76.0
 h11==0.16.0
 hf-xet==1.2.0
 httpcore==1.0.9
 httpx==0.28.1
 huggingface-hub==0.36.0
 humanfriendly==10.0
 hydra-core==1.0.7
 hyper-connections==0.4.0
 idna==3.11
 importlib_resources==6.5.2
 inflect==7.5.0
 Jinja2==3.1.3
 joblib==1.5.3
 json5==0.13.0
 jsonschema==4.26.0
 jsonschema-specifications==2025.9.1
 kiwisolver==1.4.9
 lazy_loader==0.4
 librosa==0.10.2
 linkify-it-py==2.0.3
 llvmlite==0.39.0
 local-attention==1.11.2
 lxml==6.0.2
 Markdown==3.10
 markdown-it-py==2.2.0
 MarkupSafe==2.1.5
 matplotlib==3.10.8
 matplotlib-inline==0.2.1
 mdit-py-plugins==0.3.3
 mdurl==0.1.2
 more-itertools==10.8.0
 mpmath==1.3.0
 msgpack==1.1.2
 multidict==6.7.0
 narwhals==2.15.0
 networkx==3.4.2
 numba==0.56.4
 numpy==1.23.5
 omegaconf==2.0.6
 onnxruntime==1.23.2
 onnxruntime-gpu==1.23.2
 orjson==3.11.5
 packaging==25.0
 pandas==2.3.3
 pillow==10.4.0
 platformdirs==4.5.1
 pooch==1.8.2
 portalocker==3.2.0
 praat-parselmouth==0.4.7
 propcache==0.4.1
 protobuf==6.33.3
 psutil==7.2.1
 pyasn1==0.6.1
 pyasn1_modules==0.4.2
 pycparser==2.23
 pydantic==1.10.26
 pydantic_core==2.41.5
 pydub==0.25.1
 Pygments==2.19.2
 pyparsing==3.3.1
 python-dateutil==2.9.0.post0
 python-dotenv==1.2.1
 python-multipart==0.0.21
 pytorch-triton-rocm==3.1.0
 pytz==2025.2
 pyworld==0.3.2
 PyYAML==6.0.3
 referencing==0.37.0
 regex==2025.11.3
 requests==2.32.5
 resampy==0.4.3
 rich==14.2.0
 rpds-py==0.30.0
 sacrebleu==2.5.1
 safehttpx==0.1.7
 safetensors==0.7.0
 scikit-learn==1.7.2
 scipy==1.15.3
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0
 -e git+https://github.com/ekwek1/soprano.git@5c759351f9e115aa364d5f4453ddaa7ee0d6f15e#egg=soprano_tts
 sounddevice==0.5.3
 soundfile==0.13.1
 soxr==1.0.0
 starlette==0.22.0
 sympy==1.13.1
 tabulate==0.9.0
 tensorboard==2.20.0
 tensorboard-data-server==0.7.2
 tensorboardX==2.6.4
 threadpoolctl==3.6.0
 tokenizers==0.22.2
 tomlkit==0.13.3
 torch==2.5.1+rocm6.2
 torchaudio==2.5.1+rocm6.2
 torchcrepe==0.0.23
 torchfcpe==0.0.4
 torchvision==0.20.1+rocm6.2
 tornado==6.5.4
 tqdm==4.67.1
 traitlets==5.14.3
 transformers==4.57.3
 typeguard==4.4.4
 typer==0.21.1
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 tzdata==2025.3
 uc-micro-py==1.0.3
 Unidecode==1.4.0
 urllib3==2.6.3
 uvicorn==0.40.0
 websockets==11.0.3
 Werkzeug==3.1.5
 yarl==1.22.0
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,86 @@
 # ==========================================================
 # Unified Soprano + RVC environment
 # Python == 3.10.19
 # ROCm == 6.2
 # ==========================================================
 # ----------------------
 # Core ML / GPU stack
 # ----------------------
 torch==2.5.1+rocm6.2
 torchaudio==2.5.1+rocm6.2
 torchvision==0.20.1+rocm6.2
 pytorch-triton-rocm==3.1.0
 # ----------------------
 # Numerical stack (RVC-safe)
 # ----------------------
 numpy==1.23.5
 scipy==1.15.3
 scikit-learn==1.7.2
 # ----------------------
 # Audio processing
 # ----------------------
 sounddevice==0.5.3
 soundfile==0.13.1
 pydub==0.25.1
 librosa==0.10.2
 soxr==1.0.0
 resampy==0.4.3
 praat-parselmouth==0.4.7
 pyworld==0.3.2
 av==16.1.0
 # ----------------------
 # RVC core
 # ----------------------
 fairseq==0.12.2
 faiss-cpu==1.7.3
 numba==0.56.4
 llvmlite==0.39.0
 torchcrepe==0.0.23
 torchfcpe==0.0.4
 einops==0.8.1
 local-attention==1.11.2
 omegaconf==2.0.6
 hydra-core==1.0.7
 # ----------------------
 # Soprano TTS
 # ----------------------
 transformers==4.57.3
 accelerate==1.12.0
 tokenizers==0.22.2
 safetensors==0.7.0
 huggingface-hub==0.36.0
 inflect==7.5.0
 Unidecode==1.4.0
 # ----------------------
 # Web / UI
 # ----------------------
 fastapi==0.88.0
 starlette==0.22.0
 uvicorn==0.40.0
 gradio==3.48.0
 gradio_client==0.6.1
 python-multipart==0.0.21
 orjson==3.11.5
 # ----------------------
 # Utilities
 # ----------------------
 tqdm==4.67.1
 rich==14.2.0
 psutil==7.2.1
 requests==2.32.5
 regex==2025.11.3
 filelock==3.20.0
 packaging==25.0
 PyYAML==6.0.3
 # ----------------------
 # Editable installs (local)
 # ----------------------
 -e git+https://github.com/ekwek1/soprano.git@5c759351f9e115aa364d5f4453ddaa7ee0d6f15e#egg=soprano_tts
--- a/setup_alsa_bridge.sh
+++ b/setup_alsa_bridge.sh
@@ -0,0 +1,52 @@
 #!/bin/bash
 # Setup script to make soprano_to_rvc available as an ALSA device for RVC
 ASOUND_RC="$HOME/.asoundrc"
 SINK_NAME="soprano_to_rvc"
 echo "Setting up ALSA configuration for soprano_to_rvc..."
 # Backup existing .asoundrc if it exists
 if [ -f "$ASOUND_RC" ]; then
    cp "$ASOUND_RC" "${ASOUND_RC}.backup.$(date +%s)"
    echo "✓ Backed up existing .asoundrc"
 fi
 # Check if our configuration already exists
 if grep -q "pcm.soprano_rvc" "$ASOUND_RC" 2>/dev/null; then
    echo "✓ Configuration already exists in .asoundrc"
 else
    echo "Adding ALSA configuration..."
    cat >> "$ASOUND_RC" << 'EOF'
 # Soprano to RVC bridge
 pcm.soprano_rvc {
    type pulse
    device soprano_to_rvc.monitor
    hint {
        show on
        description "Soprano TTS to RVC Bridge"
    }
 }
 ctl.soprano_rvc {
    type pulse
    device soprano_to_rvc.monitor
 }
 EOF
    echo "✓ Added ALSA configuration to .asoundrc"
 fi
 echo ""
 echo "=" * 70
 echo "Setup complete!"
 echo ""
 echo "The virtual device 'soprano_rvc' is now available as an ALSA device."
 echo ""
 echo "In RVC GUI:"
 echo "  1. Set device type to 'ALSA'"
 echo "  2. Select 'soprano_rvc' or 'Soprano TTS to RVC Bridge' as input"
 echo "  3. Make sure the soprano_to_virtual_sink.py script is running"
 echo ""
--- a/1
+++ b/1
--- a/soprano_to_virtual_sink.py
+++ b/soprano_to_virtual_sink.py
@@ -0,0 +1,299 @@
 #!/usr/bin/env python3
 """
 Soprano TTS to Virtual Sink
 This script takes text input and streams Soprano TTS output to a virtual PulseAudio sink
 that can be used as input for RVC realtime voice conversion.
 """
 import sys
 import os
 import subprocess
 import signal
 import sounddevice as sd
 import numpy as np
 import torch
 from scipy import signal as scipy_signal
 # Add soprano to path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'soprano'))
 from soprano import SopranoTTS
 # Configuration
 VIRTUAL_SINK_NAME = "soprano_to_rvc"
 SAMPLE_RATE = 48000  # Use 48kHz for better compatibility with audio systems
 SOPRANO_RATE = 32000  # Soprano outputs at 32kHz
 CHANNELS = 2  # Use stereo to match RVC expectations
 # Global flag for graceful shutdown
 running = True
 def signal_handler(sig, frame):
    """Handle Ctrl+C gracefully"""
    global running
    print("\n\nShutting down gracefully...")
    running = False
 def create_virtual_sink():
    """Create a PulseAudio virtual sink for audio output"""
    # Check if sink already exists
    try:
        result = subprocess.run(
            ["pactl", "list", "sinks", "short"],
            capture_output=True,
            text=True,
            check=True
        )
        if VIRTUAL_SINK_NAME in result.stdout:
            print(f"✓ Virtual sink '{VIRTUAL_SINK_NAME}' already exists")
            print(f"  Monitor source: {VIRTUAL_SINK_NAME}.monitor")
            return True
    except subprocess.CalledProcessError:
        pass
    print(f"Creating virtual sink: {VIRTUAL_SINK_NAME}")
    try:
        # Create a null sink (virtual audio device) at 48kHz for compatibility
        subprocess.run([
            "pactl", "load-module", "module-null-sink",
            f"sink_name={VIRTUAL_SINK_NAME}",
            f"sink_properties=device.description={VIRTUAL_SINK_NAME}",
            f"rate={SAMPLE_RATE}",
            "channels=2"  # Stereo to match RVC expectations
        ], check=True, capture_output=True)
        print(f"✓ Virtual sink '{VIRTUAL_SINK_NAME}' created successfully")
        print(f"  Monitor source: {VIRTUAL_SINK_NAME}.monitor")
        return True
    except subprocess.CalledProcessError as e:
        print(f"✗ Failed to create virtual sink: {e.stderr.decode()}")
        return False
 def remove_virtual_sink():
    """Remove the virtual sink on exit"""
    print(f"\nRemoving virtual sink: {VIRTUAL_SINK_NAME}")
    try:
        # Find the module ID
        result = subprocess.run(
            ["pactl", "list", "modules", "short"],
            capture_output=True,
            text=True,
            check=True
        )
        for line in result.stdout.split('\n'):
            if VIRTUAL_SINK_NAME in line:
                module_id = line.split()[0]
                subprocess.run(["pactl", "unload-module", module_id], check=True)
                print(f"✓ Virtual sink removed")
                return
    except Exception as e:
        print(f"✗ Error removing virtual sink: {e}")
 def get_virtual_sink_device_id():
    """Get the sounddevice ID for our virtual sink"""
    # Force refresh device list
    sd._terminate()
    sd._initialize()
    devices = sd.query_devices()
    for i, device in enumerate(devices):
        if VIRTUAL_SINK_NAME in device['name']:
            return i
    return None
 def stream_to_virtual_sink(tts_model, text, chunk_size=1):
    """Stream soprano TTS output to the virtual sink"""
    device_id = get_virtual_sink_device_id()
    if device_id is None:
        print(f"✗ Could not find virtual sink device: {VIRTUAL_SINK_NAME}")
        print(f"⚠️  Attempting to recreate virtual sink...")
        if create_virtual_sink():
            # Wait a moment for the device to appear
            import time
            time.sleep(1.0)  # Increased wait time
            device_id = get_virtual_sink_device_id()
            if device_id is None:
                print(f"✗ Still could not find virtual sink after recreation")
                print(f"\n📋 Available devices:")
                devices = sd.query_devices()
                for i, dev in enumerate(devices):
                    if 'soprano' in dev['name'].lower() or 'rvc' in dev['name'].lower():
                        print(f"   {i}: {dev['name']}")
                return False
        else:
            return False
    device_info = sd.query_devices(device_id)
    print(f"✓ Using output device: {device_info['name']}")
    # Get the device's default sample rate if 32kHz isn't supported
    device_sr = int(device_info.get('default_samplerate', SAMPLE_RATE))
    if device_sr == 0 or device_sr != SAMPLE_RATE:
        device_sr = SAMPLE_RATE  # Try with soprano's rate anyway
    print(f"  Sample rate: {device_sr} Hz")
    print(f"\n🎤 Generating and streaming speech...")
    print(f"Text: \"{text}\"\n")
    try:
        # Generate streaming audio from soprano
        stream = tts_model.infer_stream(text, chunk_size=chunk_size)
        # Open output stream to virtual sink
        with sd.OutputStream(
            samplerate=SAMPLE_RATE,
            channels=CHANNELS,
            dtype='float32',
            device=device_id,
            blocksize=0
        ) as out_stream:
            first_chunk = True
            for chunk in stream:
                if not running:
                    break
                if first_chunk:
                    print("✓ First audio chunk generated and streaming started")
                    first_chunk = False
                # Convert torch tensor to numpy if needed
                if isinstance(chunk, torch.Tensor):
                    chunk = chunk.detach().cpu().numpy()
                # Ensure correct shape for mono audio
                if chunk.ndim == 1:
                    chunk_1d = chunk
                elif chunk.ndim == 2 and chunk.shape[0] == 1:
                    chunk_1d = chunk.flatten()
                elif chunk.ndim == 2 and chunk.shape[1] == 1:
                    chunk_1d = chunk.flatten()
                else:
                    chunk_1d = chunk.flatten()
                # Check for invalid values before resampling
                if not np.all(np.isfinite(chunk_1d)):
                    print(f"⚠️  Warning: Invalid values in soprano output, cleaning...")
                    chunk_1d = np.nan_to_num(chunk_1d, nan=0.0, posinf=1.0, neginf=-1.0)
                # Resample from 32kHz (Soprano) to 48kHz (output) if needed
                if SOPRANO_RATE != SAMPLE_RATE:
                    num_samples = int(len(chunk_1d) * SAMPLE_RATE / SOPRANO_RATE)
                    chunk_resampled = scipy_signal.resample(chunk_1d, num_samples)
                else:
                    chunk_resampled = chunk_1d
                # Ensure no NaN or inf values after resampling (clip to valid range)
                if not np.all(np.isfinite(chunk_resampled)):
                    print(f"⚠️  Warning: Invalid values after resampling, cleaning...")
                chunk_resampled = np.nan_to_num(chunk_resampled, nan=0.0, posinf=1.0, neginf=-1.0)
                chunk_resampled = np.clip(chunk_resampled, -1.0, 1.0)
                # Reshape to (N, 2) for stereo output (duplicate mono to both channels)
                chunk_stereo = np.column_stack((chunk_resampled, chunk_resampled)).astype(np.float32)
                # Write to virtual sink
                out_stream.write(chunk_stereo)
        print("✓ Speech generation and streaming completed")
        return True
    except Exception as e:
        print(f"✗ Error during streaming: {e}")
        import traceback
        traceback.print_exc()
        return False
 def main():
    """Main function"""
    global running
    # Set up signal handler for graceful shutdown
    signal.signal(signal.SIGINT, signal_handler)
    print("=" * 70)
    print("Soprano TTS to Virtual Sink for RVC")
    print("=" * 70)
    print()
    # Create virtual sink
    if not create_virtual_sink():
        print("\n⚠️  If sink already exists, removing and recreating...")
        remove_virtual_sink()
        if not create_virtual_sink():
            print("✗ Failed to create virtual sink. Exiting.")
            return 1
    print()
    print("=" * 70)
    print("Virtual sink setup complete!")
    print("=" * 70)
    print()
    print("📝 Next steps:")
    print(f"   1. Open RVC realtime GUI (gui_v1.py)")
    print(f"   2. Select '{VIRTUAL_SINK_NAME}.monitor' as the INPUT device")
    print(f"   3. Select your desired output device")
    print(f"   4. Load your RVC model and start conversion")
    print(f"   5. Return here and type text to convert")
    print()
    print("=" * 70)
    print()
    # Initialize Soprano TTS
    print("🔄 Loading Soprano TTS model...")
    try:
        tts = SopranoTTS(
            backend='auto',
            device='auto',
            cache_size_mb=100,
            decoder_batch_size=1
        )
        print("✓ Soprano TTS model loaded successfully")
    except Exception as e:
        print(f"✗ Failed to load Soprano TTS: {e}")
        remove_virtual_sink()
        return 1
    print()
    print("=" * 70)
    print("Ready! Type text to generate speech (Ctrl+C to exit)")
    print("=" * 70)
    print()
    # Main loop - get text input and generate speech
    try:
        while running:
            try:
                text = input("\n🎙️  Enter text: ").strip()
                if not text:
                    print("⚠️  Please enter some text")
                    continue
                if text.lower() in ['quit', 'exit', 'q']:
                    break
                # Stream the text to the virtual sink
                stream_to_virtual_sink(tts, text, chunk_size=1)
                print()
            except EOFError:
                break
    except KeyboardInterrupt:
        print("\n\n⚠️  Interrupted by user")
    finally:
        # Clean up
        remove_virtual_sink()
        print("\n✓ Cleanup complete. Goodbye!")
    return 0
 if __name__ == "__main__":
    sys.exit(main())