Working with GUI, auto loopback creation, soprano streaming

2026-01-12 22:55:21 +02:00
commit 942ca36252
12 changed files with 1984 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.venv
+*.pth
--- a/1
+++ b/1
--- a/cleanup_virtual_sinks.sh
+++ b/cleanup_virtual_sinks.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Cleanup script to remove any leftover virtual sinks
+# Run this if you encounter issues with the virtual sink
+
+echo "🧹 Cleaning up virtual audio sinks..."
+echo ""
+
+# Find and remove soprano virtual sinks
+MODULES=$(pactl list modules short | grep -E "soprano|rvc" | awk '{print $1}')
+
+if [ -z "$MODULES" ]; then
+    echo "✓ No virtual sinks found. Nothing to clean up."
+else
+    echo "Found virtual sink modules to remove:"
+    pactl list modules short | grep -E "soprano|rvc"
+    echo ""
+    
+    for MODULE in $MODULES; do
+        echo "Removing module $MODULE..."
+        pactl unload-module "$MODULE"
+    done
+    
+    echo ""
+    echo "✓ Cleanup complete!"
+fi
+
+echo ""
+echo "Current audio sinks:"
+pactl list sinks short
--- a/constraints.txt
+++ b/constraints.txt
@@ -0,0 +1,24 @@
+# ==========================================================
+# HARD CONSTRAINTS — DO NOT UPGRADE CASUALLY
+# ==========================================================
+
+python_version == "3.10.19"
+
+# Torch / ROCm ABI lock
+torch == 2.5.1+rocm6.2
+torchaudio == 2.5.1+rocm6.2
+torchvision == 0.20.1+rocm6.2
+pytorch-triton-rocm == 3.1.0
+
+# NumPy / Numba compatibility
+numpy < 1.24
+numba == 0.56.4
+llvmlite == 0.39.0
+
+# RVC core
+fairseq == 0.12.2
+faiss-cpu == 1.7.3
+pyworld < 0.4
+
+# Gradio pin (RVC WebUI tested)
+gradio == 3.48.0
--- a/gui_v1.py.backup
+++ b/gui_v1.py.backup
--- a/launch_soprano_rvc.sh
+++ b/launch_soprano_rvc.sh
@@ -0,0 +1,260 @@
+#!/bin/bash
+# Soprano TTS to RVC Pipeline Launcher
+# This script helps you set up and run the soprano->RVC pipeline
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VENV_PATH="$SCRIPT_DIR/.venv"
+RVC_DIR="$SCRIPT_DIR/Retrieval-based-Voice-Conversion-WebUI"
+RVC_GUI="$RVC_DIR/gui_v1.py"
+SOPRANO_SCRIPT="$SCRIPT_DIR/soprano_to_virtual_sink.py"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Print colored output
+print_info() {
+    echo -e "${BLUE}ℹ ${NC}$1"
+}
+
+print_success() {
+    echo -e "${GREEN}✓${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}⚠${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}✗${NC} $1"
+}
+
+print_header() {
+    echo ""
+    echo -e "${BLUE}═══════════════════════════════════════════════════════════════════${NC}"
+    echo -e "${BLUE}  $1${NC}"
+    echo -e "${BLUE}═══════════════════════════════════════════════════════════════════${NC}"
+    echo ""
+}
+
+# Check prerequisites
+check_prerequisites() {
+    print_header "Checking Prerequisites"
+    
+    # Check if virtual environment exists
+    if [ ! -d "$VENV_PATH" ]; then
+        print_error "Virtual environment not found at: $VENV_PATH"
+        exit 1
+    fi
+    print_success "Virtual environment found"
+    
+    # Check if RVC GUI exists
+    if [ ! -f "$RVC_GUI" ]; then
+        print_error "RVC GUI not found at: $RVC_GUI"
+        exit 1
+    fi
+    print_success "RVC GUI found"
+    
+    # Check if soprano script exists
+    if [ ! -f "$SOPRANO_SCRIPT" ]; then
+        print_error "Soprano script not found at: $SOPRANO_SCRIPT"
+        exit 1
+    fi
+    print_success "Soprano script found"
+    
+    # Check if pactl is available (PulseAudio)
+    if ! command -v pactl &> /dev/null; then
+        print_error "pactl (PulseAudio) not found. Please install PulseAudio."
+        exit 1
+    fi
+    print_success "PulseAudio found"
+}
+
+# Display usage instructions
+show_usage() {
+    print_header "Soprano TTS to RVC Pipeline"
+    
+    echo "This script helps you run a text-to-speech pipeline where:"
+    echo "  1. You type text into the Soprano TTS script"
+    echo "  2. Soprano generates speech and outputs to a virtual sink"
+    echo "  3. RVC reads from that virtual sink and applies voice conversion"
+    echo "  4. RVC outputs the converted voice to your speakers/headphones"
+    echo ""
+    echo "Usage:"
+    echo "  $0 [option]"
+    echo ""
+    echo "Options:"
+    echo "  soprano      - Start only the Soprano TTS virtual sink script"
+    echo "  rvc          - Start only the RVC realtime GUI"
+    echo "  both         - Start both in separate terminal windows (default)"
+    echo "  help         - Show this help message"
+    echo ""
+}
+
+# Start soprano script
+start_soprano() {
+    print_header "Starting Soprano TTS Virtual Sink"
+    
+    print_info "Activating virtual environment..."
+    source "$VENV_PATH/bin/activate"
+    
+    print_info "Starting soprano_to_virtual_sink.py..."
+    print_info "This will create a virtual sink: soprano_to_rvc"
+    echo ""
+    
+    python "$SOPRANO_SCRIPT"
+}
+
+# Start RVC GUI
+start_rvc() {
+    print_header "Starting RVC Realtime GUI"
+    
+    print_info "Activating virtual environment..."
+    source "$VENV_PATH/bin/activate"
+    
+    print_info "Changing to RVC directory..."
+    cd "$RVC_DIR"
+    
+    print_info "Starting RVC GUI..."
+    echo ""
+    print_warning "IMPORTANT: In the RVC GUI, select 'soprano_to_rvc.monitor' as your INPUT device!"
+    echo ""
+    sleep 2
+    
+    python "$RVC_GUI"
+}
+
+# Start both in separate terminals
+start_both() {
+    print_header "Starting Both Components"
+    
+    print_info "This will open two terminal windows:"
+    print_info "  1. Soprano TTS Virtual Sink (for text input)"
+    print_info "  2. RVC Realtime GUI (for voice conversion)"
+    echo ""
+    
+    # Detect terminal emulator
+    TERMINAL=""
+    if command -v gnome-terminal &> /dev/null; then
+        TERMINAL="gnome-terminal"
+    elif command -v konsole &> /dev/null; then
+        TERMINAL="konsole"
+    elif command -v xfce4-terminal &> /dev/null; then
+        TERMINAL="xfce4-terminal"
+    elif command -v alacritty &> /dev/null; then
+        TERMINAL="alacritty"
+    elif command -v kitty &> /dev/null; then
+        TERMINAL="kitty"
+    elif command -v xterm &> /dev/null; then
+        TERMINAL="xterm"
+    else
+        print_error "No suitable terminal emulator found"
+        print_info "Please start the components manually:"
+        print_info "  Terminal 1: $0 soprano"
+        print_info "  Terminal 2: $0 rvc"
+        exit 1
+    fi
+    
+    print_success "Using terminal: $TERMINAL"
+    
+    # Start soprano in new terminal
+    print_info "Starting Soprano TTS in new terminal..."
+    case "$TERMINAL" in
+        gnome-terminal)
+            gnome-terminal -- bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
+            ;;
+        konsole)
+            konsole -e bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
+            ;;
+        xfce4-terminal)
+            xfce4-terminal -e "bash -c \"cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash\"" &
+            ;;
+        alacritty)
+            alacritty -e bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
+            ;;
+        kitty)
+            kitty bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
+            ;;
+        xterm)
+            xterm -e bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
+            ;;
+    esac
+    
+    sleep 2
+    
+    # Start RVC in new terminal
+    print_info "Starting RVC GUI in new terminal..."
+    case "$TERMINAL" in
+        gnome-terminal)
+            gnome-terminal -- bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
+            ;;
+        konsole)
+            konsole -e bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
+            ;;
+        xfce4-terminal)
+            xfce4-terminal -e "bash -c \"cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash\"" &
+            ;;
+        alacritty)
+            alacritty -e bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
+            ;;
+        kitty)
+            kitty bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
+            ;;
+        xterm)
+            xterm -e bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
+            ;;
+    esac
+    
+    echo ""
+    print_success "Both components started in separate terminals"
+    echo ""
+    print_header "Quick Setup Guide"
+    echo "1. In the RVC GUI window:"
+    echo "   - Select your RVC model (.pth file)"
+    echo "   - Select the corresponding index file"
+    echo "   - Choose 'soprano_to_rvc.monitor' as INPUT device"
+    echo "   - Choose your speakers/headphones as OUTPUT device"
+    echo "   - Click 'Start Voice Conversion'"
+    echo ""
+    echo "2. In the Soprano TTS window:"
+    echo "   - Type any text you want to convert"
+    echo "   - Press Enter to generate and stream"
+    echo ""
+    echo "3. Listen to the RVC-converted output!"
+    echo ""
+    print_info "Press Ctrl+C in each terminal to stop"
+    echo ""
+}
+
+# Main script
+main() {
+    case "${1:-both}" in
+        soprano)
+            check_prerequisites
+            start_soprano
+            ;;
+        rvc)
+            check_prerequisites
+            start_rvc
+            ;;
+        both)
+            check_prerequisites
+            start_both
+            ;;
+        help|--help|-h)
+            show_usage
+            ;;
+        *)
+            print_error "Unknown option: $1"
+            show_usage
+            exit 1
+            ;;
+    esac
+}
+
+main "$@"
--- a/python-version.txt
+++ b/python-version.txt
@@ -0,0 +1 @@
+3.10.19
--- a/requirements.lock.txt
+++ b/requirements.lock.txt
@@ -0,0 +1,159 @@
+absl-py==2.3.1
+accelerate==1.12.0
+aiofiles==23.2.1
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+altair==5.5.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+antlr4-python3-runtime==4.8
+anyio==4.12.1
+async-timeout==5.0.1
+attrs==25.4.0
+audioread==3.1.0
+av==16.1.0
+bitarray==3.8.0
+brotli==1.2.0
+certifi==2026.1.4
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+colorama==0.4.6
+coloredlogs==15.0.1
+contourpy==1.3.2
+cycler==0.12.1
+Cython==3.2.4
+decorator==5.2.1
+einops==0.8.1
+exceptiongroup==1.3.1
+fairseq==0.12.2
+faiss-cpu==1.7.3
+fastapi==0.88.0
+ffmpeg-python==0.2.0
+ffmpy==0.3.1
+filelock==3.20.0
+flatbuffers==25.12.19
+fonttools==4.61.1
+frozenlist==1.8.0
+fsspec==2025.12.0
+future==1.0.0
+gradio==3.48.0
+gradio_client==0.6.1
+groovy==0.1.2
+grpcio==1.76.0
+h11==0.16.0
+hf-xet==1.2.0
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.36.0
+humanfriendly==10.0
+hydra-core==1.0.7
+hyper-connections==0.4.0
+idna==3.11
+importlib_resources==6.5.2
+inflect==7.5.0
+Jinja2==3.1.3
+joblib==1.5.3
+json5==0.13.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+kiwisolver==1.4.9
+lazy_loader==0.4
+librosa==0.10.2
+linkify-it-py==2.0.3
+llvmlite==0.39.0
+local-attention==1.11.2
+lxml==6.0.2
+Markdown==3.10
+markdown-it-py==2.2.0
+MarkupSafe==2.1.5
+matplotlib==3.10.8
+matplotlib-inline==0.2.1
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+more-itertools==10.8.0
+mpmath==1.3.0
+msgpack==1.1.2
+multidict==6.7.0
+narwhals==2.15.0
+networkx==3.4.2
+numba==0.56.4
+numpy==1.23.5
+omegaconf==2.0.6
+onnxruntime==1.23.2
+onnxruntime-gpu==1.23.2
+orjson==3.11.5
+packaging==25.0
+pandas==2.3.3
+pillow==10.4.0
+platformdirs==4.5.1
+pooch==1.8.2
+portalocker==3.2.0
+praat-parselmouth==0.4.7
+propcache==0.4.1
+protobuf==6.33.3
+psutil==7.2.1
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==1.10.26
+pydantic_core==2.41.5
+pydub==0.25.1
+Pygments==2.19.2
+pyparsing==3.3.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-multipart==0.0.21
+pytorch-triton-rocm==3.1.0
+pytz==2025.2
+pyworld==0.3.2
+PyYAML==6.0.3
+referencing==0.37.0
+regex==2025.11.3
+requests==2.32.5
+resampy==0.4.3
+rich==14.2.0
+rpds-py==0.30.0
+sacrebleu==2.5.1
+safehttpx==0.1.7
+safetensors==0.7.0
+scikit-learn==1.7.2
+scipy==1.15.3
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+-e git+https://github.com/ekwek1/soprano.git@5c759351f9e115aa364d5f4453ddaa7ee0d6f15e#egg=soprano_tts
+sounddevice==0.5.3
+soundfile==0.13.1
+soxr==1.0.0
+starlette==0.22.0
+sympy==1.13.1
+tabulate==0.9.0
+tensorboard==2.20.0
+tensorboard-data-server==0.7.2
+tensorboardX==2.6.4
+threadpoolctl==3.6.0
+tokenizers==0.22.2
+tomlkit==0.13.3
+torch==2.5.1+rocm6.2
+torchaudio==2.5.1+rocm6.2
+torchcrepe==0.0.23
+torchfcpe==0.0.4
+torchvision==0.20.1+rocm6.2
+tornado==6.5.4
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.57.3
+typeguard==4.4.4
+typer==0.21.1
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+tzdata==2025.3
+uc-micro-py==1.0.3
+Unidecode==1.4.0
+urllib3==2.6.3
+uvicorn==0.40.0
+websockets==11.0.3
+Werkzeug==3.1.5
+yarl==1.22.0
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,86 @@
+# ==========================================================
+# Unified Soprano + RVC environment
+# Python == 3.10.19
+# ROCm == 6.2
+# ==========================================================
+
+# ----------------------
+# Core ML / GPU stack
+# ----------------------
+torch==2.5.1+rocm6.2
+torchaudio==2.5.1+rocm6.2
+torchvision==0.20.1+rocm6.2
+pytorch-triton-rocm==3.1.0
+
+# ----------------------
+# Numerical stack (RVC-safe)
+# ----------------------
+numpy==1.23.5
+scipy==1.15.3
+scikit-learn==1.7.2
+
+# ----------------------
+# Audio processing
+# ----------------------
+sounddevice==0.5.3
+soundfile==0.13.1
+pydub==0.25.1
+librosa==0.10.2
+soxr==1.0.0
+resampy==0.4.3
+praat-parselmouth==0.4.7
+pyworld==0.3.2
+av==16.1.0
+
+# ----------------------
+# RVC core
+# ----------------------
+fairseq==0.12.2
+faiss-cpu==1.7.3
+numba==0.56.4
+llvmlite==0.39.0
+torchcrepe==0.0.23
+torchfcpe==0.0.4
+einops==0.8.1
+local-attention==1.11.2
+omegaconf==2.0.6
+hydra-core==1.0.7
+
+# ----------------------
+# Soprano TTS
+# ----------------------
+transformers==4.57.3
+accelerate==1.12.0
+tokenizers==0.22.2
+safetensors==0.7.0
+huggingface-hub==0.36.0
+inflect==7.5.0
+Unidecode==1.4.0
+
+# ----------------------
+# Web / UI
+# ----------------------
+fastapi==0.88.0
+starlette==0.22.0
+uvicorn==0.40.0
+gradio==3.48.0
+gradio_client==0.6.1
+python-multipart==0.0.21
+orjson==3.11.5
+
+# ----------------------
+# Utilities
+# ----------------------
+tqdm==4.67.1
+rich==14.2.0
+psutil==7.2.1
+requests==2.32.5
+regex==2025.11.3
+filelock==3.20.0
+packaging==25.0
+PyYAML==6.0.3
+
+# ----------------------
+# Editable installs (local)
+# ----------------------
+-e git+https://github.com/ekwek1/soprano.git@5c759351f9e115aa364d5f4453ddaa7ee0d6f15e#egg=soprano_tts
--- a/setup_alsa_bridge.sh
+++ b/setup_alsa_bridge.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Setup script to make soprano_to_rvc available as an ALSA device for RVC
+
+ASOUND_RC="$HOME/.asoundrc"
+SINK_NAME="soprano_to_rvc"
+
+echo "Setting up ALSA configuration for soprano_to_rvc..."
+
+# Backup existing .asoundrc if it exists
+if [ -f "$ASOUND_RC" ]; then
+    cp "$ASOUND_RC" "${ASOUND_RC}.backup.$(date +%s)"
+    echo "✓ Backed up existing .asoundrc"
+fi
+
+# Check if our configuration already exists
+if grep -q "pcm.soprano_rvc" "$ASOUND_RC" 2>/dev/null; then
+    echo "✓ Configuration already exists in .asoundrc"
+else
+    echo "Adding ALSA configuration..."
+    
+    cat >> "$ASOUND_RC" << 'EOF'
+
+# Soprano to RVC bridge
+pcm.soprano_rvc {
+    type pulse
+    device soprano_to_rvc.monitor
+    hint {
+        show on
+        description "Soprano TTS to RVC Bridge"
+    }
+}
+
+ctl.soprano_rvc {
+    type pulse
+    device soprano_to_rvc.monitor
+}
+EOF
+    
+    echo "✓ Added ALSA configuration to .asoundrc"
+fi
+
+echo ""
+echo "=" * 70
+echo "Setup complete!"
+echo ""
+echo "The virtual device 'soprano_rvc' is now available as an ALSA device."
+echo ""
+echo "In RVC GUI:"
+echo "  1. Set device type to 'ALSA'"
+echo "  2. Select 'soprano_rvc' or 'Soprano TTS to RVC Bridge' as input"
+echo "  3. Make sure the soprano_to_virtual_sink.py script is running"
+echo ""
--- a/1
+++ b/1
--- a/soprano_to_virtual_sink.py
+++ b/soprano_to_virtual_sink.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python3
+"""
+Soprano TTS to Virtual Sink
+This script takes text input and streams Soprano TTS output to a virtual PulseAudio sink
+that can be used as input for RVC realtime voice conversion.
+"""
+
+import sys
+import os
+import subprocess
+import signal
+import sounddevice as sd
+import numpy as np
+import torch
+from scipy import signal as scipy_signal
+
+# Add soprano to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'soprano'))
+from soprano import SopranoTTS
+
+# Configuration
+VIRTUAL_SINK_NAME = "soprano_to_rvc"
+SAMPLE_RATE = 48000  # Use 48kHz for better compatibility with audio systems
+SOPRANO_RATE = 32000  # Soprano outputs at 32kHz
+CHANNELS = 2  # Use stereo to match RVC expectations
+
+# Global flag for graceful shutdown
+running = True
+
+
+def signal_handler(sig, frame):
+    """Handle Ctrl+C gracefully"""
+    global running
+    print("\n\nShutting down gracefully...")
+    running = False
+
+
+def create_virtual_sink():
+    """Create a PulseAudio virtual sink for audio output"""
+    # Check if sink already exists
+    try:
+        result = subprocess.run(
+            ["pactl", "list", "sinks", "short"],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        if VIRTUAL_SINK_NAME in result.stdout:
+            print(f"✓ Virtual sink '{VIRTUAL_SINK_NAME}' already exists")
+            print(f"  Monitor source: {VIRTUAL_SINK_NAME}.monitor")
+            return True
+    except subprocess.CalledProcessError:
+        pass
+    
+    print(f"Creating virtual sink: {VIRTUAL_SINK_NAME}")
+    try:
+        # Create a null sink (virtual audio device) at 48kHz for compatibility
+        subprocess.run([
+            "pactl", "load-module", "module-null-sink",
+            f"sink_name={VIRTUAL_SINK_NAME}",
+            f"sink_properties=device.description={VIRTUAL_SINK_NAME}",
+            f"rate={SAMPLE_RATE}",
+            "channels=2"  # Stereo to match RVC expectations
+        ], check=True, capture_output=True)
+        print(f"✓ Virtual sink '{VIRTUAL_SINK_NAME}' created successfully")
+        print(f"  Monitor source: {VIRTUAL_SINK_NAME}.monitor")
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"✗ Failed to create virtual sink: {e.stderr.decode()}")
+        return False
+
+
+def remove_virtual_sink():
+    """Remove the virtual sink on exit"""
+    print(f"\nRemoving virtual sink: {VIRTUAL_SINK_NAME}")
+    try:
+        # Find the module ID
+        result = subprocess.run(
+            ["pactl", "list", "modules", "short"],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        for line in result.stdout.split('\n'):
+            if VIRTUAL_SINK_NAME in line:
+                module_id = line.split()[0]
+                subprocess.run(["pactl", "unload-module", module_id], check=True)
+                print(f"✓ Virtual sink removed")
+                return
+    except Exception as e:
+        print(f"✗ Error removing virtual sink: {e}")
+
+
+def get_virtual_sink_device_id():
+    """Get the sounddevice ID for our virtual sink"""
+    # Force refresh device list
+    sd._terminate()
+    sd._initialize()
+    
+    devices = sd.query_devices()
+    for i, device in enumerate(devices):
+        if VIRTUAL_SINK_NAME in device['name']:
+            return i
+    return None
+
+
+def stream_to_virtual_sink(tts_model, text, chunk_size=1):
+    """Stream soprano TTS output to the virtual sink"""
+    device_id = get_virtual_sink_device_id()
+    
+    if device_id is None:
+        print(f"✗ Could not find virtual sink device: {VIRTUAL_SINK_NAME}")
+        print(f"⚠️  Attempting to recreate virtual sink...")
+        if create_virtual_sink():
+            # Wait a moment for the device to appear
+            import time
+            time.sleep(1.0)  # Increased wait time
+            device_id = get_virtual_sink_device_id()
+            if device_id is None:
+                print(f"✗ Still could not find virtual sink after recreation")
+                print(f"\n📋 Available devices:")
+                devices = sd.query_devices()
+                for i, dev in enumerate(devices):
+                    if 'soprano' in dev['name'].lower() or 'rvc' in dev['name'].lower():
+                        print(f"   {i}: {dev['name']}")
+                return False
+        else:
+            return False
+    
+    device_info = sd.query_devices(device_id)
+    print(f"✓ Using output device: {device_info['name']}")
+    
+    # Get the device's default sample rate if 32kHz isn't supported
+    device_sr = int(device_info.get('default_samplerate', SAMPLE_RATE))
+    if device_sr == 0 or device_sr != SAMPLE_RATE:
+        device_sr = SAMPLE_RATE  # Try with soprano's rate anyway
+    
+    print(f"  Sample rate: {device_sr} Hz")
+    print(f"\n🎤 Generating and streaming speech...")
+    print(f"Text: \"{text}\"\n")
+    
+    try:
+        # Generate streaming audio from soprano
+        stream = tts_model.infer_stream(text, chunk_size=chunk_size)
+        
+        # Open output stream to virtual sink
+        with sd.OutputStream(
+            samplerate=SAMPLE_RATE,
+            channels=CHANNELS,
+            dtype='float32',
+            device=device_id,
+            blocksize=0
+        ) as out_stream:
+            first_chunk = True
+            for chunk in stream:
+                if not running:
+                    break
+                    
+                if first_chunk:
+                    print("✓ First audio chunk generated and streaming started")
+                    first_chunk = False
+                
+                # Convert torch tensor to numpy if needed
+                if isinstance(chunk, torch.Tensor):
+                    chunk = chunk.detach().cpu().numpy()
+                
+                # Ensure correct shape for mono audio
+                if chunk.ndim == 1:
+                    chunk_1d = chunk
+                elif chunk.ndim == 2 and chunk.shape[0] == 1:
+                    chunk_1d = chunk.flatten()
+                elif chunk.ndim == 2 and chunk.shape[1] == 1:
+                    chunk_1d = chunk.flatten()
+                else:
+                    chunk_1d = chunk.flatten()
+                
+                # Check for invalid values before resampling
+                if not np.all(np.isfinite(chunk_1d)):
+                    print(f"⚠️  Warning: Invalid values in soprano output, cleaning...")
+                    chunk_1d = np.nan_to_num(chunk_1d, nan=0.0, posinf=1.0, neginf=-1.0)
+                
+                # Resample from 32kHz (Soprano) to 48kHz (output) if needed
+                if SOPRANO_RATE != SAMPLE_RATE:
+                    num_samples = int(len(chunk_1d) * SAMPLE_RATE / SOPRANO_RATE)
+                    chunk_resampled = scipy_signal.resample(chunk_1d, num_samples)
+                else:
+                    chunk_resampled = chunk_1d
+                
+                # Ensure no NaN or inf values after resampling (clip to valid range)
+                if not np.all(np.isfinite(chunk_resampled)):
+                    print(f"⚠️  Warning: Invalid values after resampling, cleaning...")
+                chunk_resampled = np.nan_to_num(chunk_resampled, nan=0.0, posinf=1.0, neginf=-1.0)
+                chunk_resampled = np.clip(chunk_resampled, -1.0, 1.0)
+                
+                # Reshape to (N, 2) for stereo output (duplicate mono to both channels)
+                chunk_stereo = np.column_stack((chunk_resampled, chunk_resampled)).astype(np.float32)
+                
+                # Write to virtual sink
+                out_stream.write(chunk_stereo)
+        
+        print("✓ Speech generation and streaming completed")
+        return True
+        
+    except Exception as e:
+        print(f"✗ Error during streaming: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+def main():
+    """Main function"""
+    global running
+    
+    # Set up signal handler for graceful shutdown
+    signal.signal(signal.SIGINT, signal_handler)
+    
+    print("=" * 70)
+    print("Soprano TTS to Virtual Sink for RVC")
+    print("=" * 70)
+    print()
+    
+    # Create virtual sink
+    if not create_virtual_sink():
+        print("\n⚠️  If sink already exists, removing and recreating...")
+        remove_virtual_sink()
+        if not create_virtual_sink():
+            print("✗ Failed to create virtual sink. Exiting.")
+            return 1
+    
+    print()
+    print("=" * 70)
+    print("Virtual sink setup complete!")
+    print("=" * 70)
+    print()
+    print("📝 Next steps:")
+    print(f"   1. Open RVC realtime GUI (gui_v1.py)")
+    print(f"   2. Select '{VIRTUAL_SINK_NAME}.monitor' as the INPUT device")
+    print(f"   3. Select your desired output device")
+    print(f"   4. Load your RVC model and start conversion")
+    print(f"   5. Return here and type text to convert")
+    print()
+    print("=" * 70)
+    print()
+    
+    # Initialize Soprano TTS
+    print("🔄 Loading Soprano TTS model...")
+    try:
+        tts = SopranoTTS(
+            backend='auto',
+            device='auto',
+            cache_size_mb=100,
+            decoder_batch_size=1
+        )
+        print("✓ Soprano TTS model loaded successfully")
+    except Exception as e:
+        print(f"✗ Failed to load Soprano TTS: {e}")
+        remove_virtual_sink()
+        return 1
+    
+    print()
+    print("=" * 70)
+    print("Ready! Type text to generate speech (Ctrl+C to exit)")
+    print("=" * 70)
+    print()
+    
+    # Main loop - get text input and generate speech
+    try:
+        while running:
+            try:
+                text = input("\n🎙️  Enter text: ").strip()
+                
+                if not text:
+                    print("⚠️  Please enter some text")
+                    continue
+                
+                if text.lower() in ['quit', 'exit', 'q']:
+                    break
+                
+                # Stream the text to the virtual sink
+                stream_to_virtual_sink(tts, text, chunk_size=1)
+                print()
+                
+            except EOFError:
+                break
+                
+    except KeyboardInterrupt:
+        print("\n\n⚠️  Interrupted by user")
+    
+    finally:
+        # Clean up
+        remove_virtual_sink()
+        print("\n✓ Cleanup complete. Goodbye!")
+    
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())