Working with GUI, auto loopback creation, soprano streaming
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
.venv
|
||||
*.pth
|
||||
1
Retrieval-based-Voice-Conversion-WebUI
Submodule
1
Retrieval-based-Voice-Conversion-WebUI
Submodule
Submodule Retrieval-based-Voice-Conversion-WebUI added at 7ef1986778
29
cleanup_virtual_sinks.sh
Executable file
29
cleanup_virtual_sinks.sh
Executable file
@@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
# Cleanup script to remove any leftover virtual sinks
|
||||
# Run this if you encounter issues with the virtual sink
|
||||
|
||||
echo "🧹 Cleaning up virtual audio sinks..."
|
||||
echo ""
|
||||
|
||||
# Find and remove soprano virtual sinks
|
||||
MODULES=$(pactl list modules short | grep -E "soprano|rvc" | awk '{print $1}')
|
||||
|
||||
if [ -z "$MODULES" ]; then
|
||||
echo "✓ No virtual sinks found. Nothing to clean up."
|
||||
else
|
||||
echo "Found virtual sink modules to remove:"
|
||||
pactl list modules short | grep -E "soprano|rvc"
|
||||
echo ""
|
||||
|
||||
for MODULE in $MODULES; do
|
||||
echo "Removing module $MODULE..."
|
||||
pactl unload-module "$MODULE"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "✓ Cleanup complete!"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Current audio sinks:"
|
||||
pactl list sinks short
|
||||
24
constraints.txt
Normal file
24
constraints.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
# ==========================================================
|
||||
# HARD CONSTRAINTS — DO NOT UPGRADE CASUALLY
|
||||
# ==========================================================
|
||||
|
||||
python_version == "3.10.19"
|
||||
|
||||
# Torch / ROCm ABI lock
|
||||
torch == 2.5.1+rocm6.2
|
||||
torchaudio == 2.5.1+rocm6.2
|
||||
torchvision == 0.20.1+rocm6.2
|
||||
pytorch-triton-rocm == 3.1.0
|
||||
|
||||
# NumPy / Numba compatibility
|
||||
numpy < 1.24
|
||||
numba == 0.56.4
|
||||
llvmlite == 0.39.0
|
||||
|
||||
# RVC core
|
||||
fairseq == 0.12.2
|
||||
faiss-cpu == 1.7.3
|
||||
pyworld < 0.4
|
||||
|
||||
# Gradio pin (RVC WebUI tested)
|
||||
gradio == 3.48.0
|
||||
1070
gui_v1.py.backup
Normal file
1070
gui_v1.py.backup
Normal file
File diff suppressed because it is too large
Load Diff
260
launch_soprano_rvc.sh
Executable file
260
launch_soprano_rvc.sh
Executable file
@@ -0,0 +1,260 @@
|
||||
#!/bin/bash
|
||||
# Soprano TTS to RVC Pipeline Launcher
|
||||
# This script helps you set up and run the soprano->RVC pipeline
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
VENV_PATH="$SCRIPT_DIR/.venv"
|
||||
RVC_DIR="$SCRIPT_DIR/Retrieval-based-Voice-Conversion-WebUI"
|
||||
RVC_GUI="$RVC_DIR/gui_v1.py"
|
||||
SOPRANO_SCRIPT="$SCRIPT_DIR/soprano_to_virtual_sink.py"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Print colored output
|
||||
print_info() {
|
||||
echo -e "${BLUE}ℹ ${NC}$1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}✓${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}⚠${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}✗${NC} $1"
|
||||
}
|
||||
|
||||
print_header() {
|
||||
echo ""
|
||||
echo -e "${BLUE}═══════════════════════════════════════════════════════════════════${NC}"
|
||||
echo -e "${BLUE} $1${NC}"
|
||||
echo -e "${BLUE}═══════════════════════════════════════════════════════════════════${NC}"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Check prerequisites
|
||||
check_prerequisites() {
|
||||
print_header "Checking Prerequisites"
|
||||
|
||||
# Check if virtual environment exists
|
||||
if [ ! -d "$VENV_PATH" ]; then
|
||||
print_error "Virtual environment not found at: $VENV_PATH"
|
||||
exit 1
|
||||
fi
|
||||
print_success "Virtual environment found"
|
||||
|
||||
# Check if RVC GUI exists
|
||||
if [ ! -f "$RVC_GUI" ]; then
|
||||
print_error "RVC GUI not found at: $RVC_GUI"
|
||||
exit 1
|
||||
fi
|
||||
print_success "RVC GUI found"
|
||||
|
||||
# Check if soprano script exists
|
||||
if [ ! -f "$SOPRANO_SCRIPT" ]; then
|
||||
print_error "Soprano script not found at: $SOPRANO_SCRIPT"
|
||||
exit 1
|
||||
fi
|
||||
print_success "Soprano script found"
|
||||
|
||||
# Check if pactl is available (PulseAudio)
|
||||
if ! command -v pactl &> /dev/null; then
|
||||
print_error "pactl (PulseAudio) not found. Please install PulseAudio."
|
||||
exit 1
|
||||
fi
|
||||
print_success "PulseAudio found"
|
||||
}
|
||||
|
||||
# Display usage instructions
|
||||
show_usage() {
|
||||
print_header "Soprano TTS to RVC Pipeline"
|
||||
|
||||
echo "This script helps you run a text-to-speech pipeline where:"
|
||||
echo " 1. You type text into the Soprano TTS script"
|
||||
echo " 2. Soprano generates speech and outputs to a virtual sink"
|
||||
echo " 3. RVC reads from that virtual sink and applies voice conversion"
|
||||
echo " 4. RVC outputs the converted voice to your speakers/headphones"
|
||||
echo ""
|
||||
echo "Usage:"
|
||||
echo " $0 [option]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " soprano - Start only the Soprano TTS virtual sink script"
|
||||
echo " rvc - Start only the RVC realtime GUI"
|
||||
echo " both - Start both in separate terminal windows (default)"
|
||||
echo " help - Show this help message"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Start soprano script
|
||||
start_soprano() {
|
||||
print_header "Starting Soprano TTS Virtual Sink"
|
||||
|
||||
print_info "Activating virtual environment..."
|
||||
source "$VENV_PATH/bin/activate"
|
||||
|
||||
print_info "Starting soprano_to_virtual_sink.py..."
|
||||
print_info "This will create a virtual sink: soprano_to_rvc"
|
||||
echo ""
|
||||
|
||||
python "$SOPRANO_SCRIPT"
|
||||
}
|
||||
|
||||
# Start RVC GUI
|
||||
start_rvc() {
|
||||
print_header "Starting RVC Realtime GUI"
|
||||
|
||||
print_info "Activating virtual environment..."
|
||||
source "$VENV_PATH/bin/activate"
|
||||
|
||||
print_info "Changing to RVC directory..."
|
||||
cd "$RVC_DIR"
|
||||
|
||||
print_info "Starting RVC GUI..."
|
||||
echo ""
|
||||
print_warning "IMPORTANT: In the RVC GUI, select 'soprano_to_rvc.monitor' as your INPUT device!"
|
||||
echo ""
|
||||
sleep 2
|
||||
|
||||
python "$RVC_GUI"
|
||||
}
|
||||
|
||||
# Start both in separate terminals
|
||||
start_both() {
|
||||
print_header "Starting Both Components"
|
||||
|
||||
print_info "This will open two terminal windows:"
|
||||
print_info " 1. Soprano TTS Virtual Sink (for text input)"
|
||||
print_info " 2. RVC Realtime GUI (for voice conversion)"
|
||||
echo ""
|
||||
|
||||
# Detect terminal emulator
|
||||
TERMINAL=""
|
||||
if command -v gnome-terminal &> /dev/null; then
|
||||
TERMINAL="gnome-terminal"
|
||||
elif command -v konsole &> /dev/null; then
|
||||
TERMINAL="konsole"
|
||||
elif command -v xfce4-terminal &> /dev/null; then
|
||||
TERMINAL="xfce4-terminal"
|
||||
elif command -v alacritty &> /dev/null; then
|
||||
TERMINAL="alacritty"
|
||||
elif command -v kitty &> /dev/null; then
|
||||
TERMINAL="kitty"
|
||||
elif command -v xterm &> /dev/null; then
|
||||
TERMINAL="xterm"
|
||||
else
|
||||
print_error "No suitable terminal emulator found"
|
||||
print_info "Please start the components manually:"
|
||||
print_info " Terminal 1: $0 soprano"
|
||||
print_info " Terminal 2: $0 rvc"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "Using terminal: $TERMINAL"
|
||||
|
||||
# Start soprano in new terminal
|
||||
print_info "Starting Soprano TTS in new terminal..."
|
||||
case "$TERMINAL" in
|
||||
gnome-terminal)
|
||||
gnome-terminal -- bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
|
||||
;;
|
||||
konsole)
|
||||
konsole -e bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
|
||||
;;
|
||||
xfce4-terminal)
|
||||
xfce4-terminal -e "bash -c \"cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash\"" &
|
||||
;;
|
||||
alacritty)
|
||||
alacritty -e bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
|
||||
;;
|
||||
kitty)
|
||||
kitty bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
|
||||
;;
|
||||
xterm)
|
||||
xterm -e bash -c "cd '$SCRIPT_DIR' && bash '$0' soprano; exec bash" &
|
||||
;;
|
||||
esac
|
||||
|
||||
sleep 2
|
||||
|
||||
# Start RVC in new terminal
|
||||
print_info "Starting RVC GUI in new terminal..."
|
||||
case "$TERMINAL" in
|
||||
gnome-terminal)
|
||||
gnome-terminal -- bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
|
||||
;;
|
||||
konsole)
|
||||
konsole -e bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
|
||||
;;
|
||||
xfce4-terminal)
|
||||
xfce4-terminal -e "bash -c \"cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash\"" &
|
||||
;;
|
||||
alacritty)
|
||||
alacritty -e bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
|
||||
;;
|
||||
kitty)
|
||||
kitty bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
|
||||
;;
|
||||
xterm)
|
||||
xterm -e bash -c "cd '$SCRIPT_DIR' && bash '$0' rvc; exec bash" &
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
print_success "Both components started in separate terminals"
|
||||
echo ""
|
||||
print_header "Quick Setup Guide"
|
||||
echo "1. In the RVC GUI window:"
|
||||
echo " - Select your RVC model (.pth file)"
|
||||
echo " - Select the corresponding index file"
|
||||
echo " - Choose 'soprano_to_rvc.monitor' as INPUT device"
|
||||
echo " - Choose your speakers/headphones as OUTPUT device"
|
||||
echo " - Click 'Start Voice Conversion'"
|
||||
echo ""
|
||||
echo "2. In the Soprano TTS window:"
|
||||
echo " - Type any text you want to convert"
|
||||
echo " - Press Enter to generate and stream"
|
||||
echo ""
|
||||
echo "3. Listen to the RVC-converted output!"
|
||||
echo ""
|
||||
print_info "Press Ctrl+C in each terminal to stop"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Main script
|
||||
main() {
|
||||
case "${1:-both}" in
|
||||
soprano)
|
||||
check_prerequisites
|
||||
start_soprano
|
||||
;;
|
||||
rvc)
|
||||
check_prerequisites
|
||||
start_rvc
|
||||
;;
|
||||
both)
|
||||
check_prerequisites
|
||||
start_both
|
||||
;;
|
||||
help|--help|-h)
|
||||
show_usage
|
||||
;;
|
||||
*)
|
||||
print_error "Unknown option: $1"
|
||||
show_usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
1
python-version.txt
Normal file
1
python-version.txt
Normal file
@@ -0,0 +1 @@
|
||||
3.10.19
|
||||
159
requirements.lock.txt
Normal file
159
requirements.lock.txt
Normal file
@@ -0,0 +1,159 @@
|
||||
absl-py==2.3.1
|
||||
accelerate==1.12.0
|
||||
aiofiles==23.2.1
|
||||
aiohappyeyeballs==2.6.1
|
||||
aiohttp==3.13.3
|
||||
aiosignal==1.4.0
|
||||
altair==5.5.0
|
||||
annotated-doc==0.0.4
|
||||
annotated-types==0.7.0
|
||||
antlr4-python3-runtime==4.8
|
||||
anyio==4.12.1
|
||||
async-timeout==5.0.1
|
||||
attrs==25.4.0
|
||||
audioread==3.1.0
|
||||
av==16.1.0
|
||||
bitarray==3.8.0
|
||||
brotli==1.2.0
|
||||
certifi==2026.1.4
|
||||
cffi==2.0.0
|
||||
charset-normalizer==3.4.4
|
||||
click==8.3.1
|
||||
colorama==0.4.6
|
||||
coloredlogs==15.0.1
|
||||
contourpy==1.3.2
|
||||
cycler==0.12.1
|
||||
Cython==3.2.4
|
||||
decorator==5.2.1
|
||||
einops==0.8.1
|
||||
exceptiongroup==1.3.1
|
||||
fairseq==0.12.2
|
||||
faiss-cpu==1.7.3
|
||||
fastapi==0.88.0
|
||||
ffmpeg-python==0.2.0
|
||||
ffmpy==0.3.1
|
||||
filelock==3.20.0
|
||||
flatbuffers==25.12.19
|
||||
fonttools==4.61.1
|
||||
frozenlist==1.8.0
|
||||
fsspec==2025.12.0
|
||||
future==1.0.0
|
||||
gradio==3.48.0
|
||||
gradio_client==0.6.1
|
||||
groovy==0.1.2
|
||||
grpcio==1.76.0
|
||||
h11==0.16.0
|
||||
hf-xet==1.2.0
|
||||
httpcore==1.0.9
|
||||
httpx==0.28.1
|
||||
huggingface-hub==0.36.0
|
||||
humanfriendly==10.0
|
||||
hydra-core==1.0.7
|
||||
hyper-connections==0.4.0
|
||||
idna==3.11
|
||||
importlib_resources==6.5.2
|
||||
inflect==7.5.0
|
||||
Jinja2==3.1.3
|
||||
joblib==1.5.3
|
||||
json5==0.13.0
|
||||
jsonschema==4.26.0
|
||||
jsonschema-specifications==2025.9.1
|
||||
kiwisolver==1.4.9
|
||||
lazy_loader==0.4
|
||||
librosa==0.10.2
|
||||
linkify-it-py==2.0.3
|
||||
llvmlite==0.39.0
|
||||
local-attention==1.11.2
|
||||
lxml==6.0.2
|
||||
Markdown==3.10
|
||||
markdown-it-py==2.2.0
|
||||
MarkupSafe==2.1.5
|
||||
matplotlib==3.10.8
|
||||
matplotlib-inline==0.2.1
|
||||
mdit-py-plugins==0.3.3
|
||||
mdurl==0.1.2
|
||||
more-itertools==10.8.0
|
||||
mpmath==1.3.0
|
||||
msgpack==1.1.2
|
||||
multidict==6.7.0
|
||||
narwhals==2.15.0
|
||||
networkx==3.4.2
|
||||
numba==0.56.4
|
||||
numpy==1.23.5
|
||||
omegaconf==2.0.6
|
||||
onnxruntime==1.23.2
|
||||
onnxruntime-gpu==1.23.2
|
||||
orjson==3.11.5
|
||||
packaging==25.0
|
||||
pandas==2.3.3
|
||||
pillow==10.4.0
|
||||
platformdirs==4.5.1
|
||||
pooch==1.8.2
|
||||
portalocker==3.2.0
|
||||
praat-parselmouth==0.4.7
|
||||
propcache==0.4.1
|
||||
protobuf==6.33.3
|
||||
psutil==7.2.1
|
||||
pyasn1==0.6.1
|
||||
pyasn1_modules==0.4.2
|
||||
pycparser==2.23
|
||||
pydantic==1.10.26
|
||||
pydantic_core==2.41.5
|
||||
pydub==0.25.1
|
||||
Pygments==2.19.2
|
||||
pyparsing==3.3.1
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.2.1
|
||||
python-multipart==0.0.21
|
||||
pytorch-triton-rocm==3.1.0
|
||||
pytz==2025.2
|
||||
pyworld==0.3.2
|
||||
PyYAML==6.0.3
|
||||
referencing==0.37.0
|
||||
regex==2025.11.3
|
||||
requests==2.32.5
|
||||
resampy==0.4.3
|
||||
rich==14.2.0
|
||||
rpds-py==0.30.0
|
||||
sacrebleu==2.5.1
|
||||
safehttpx==0.1.7
|
||||
safetensors==0.7.0
|
||||
scikit-learn==1.7.2
|
||||
scipy==1.15.3
|
||||
semantic-version==2.10.0
|
||||
shellingham==1.5.4
|
||||
six==1.17.0
|
||||
-e git+https://github.com/ekwek1/soprano.git@5c759351f9e115aa364d5f4453ddaa7ee0d6f15e#egg=soprano_tts
|
||||
sounddevice==0.5.3
|
||||
soundfile==0.13.1
|
||||
soxr==1.0.0
|
||||
starlette==0.22.0
|
||||
sympy==1.13.1
|
||||
tabulate==0.9.0
|
||||
tensorboard==2.20.0
|
||||
tensorboard-data-server==0.7.2
|
||||
tensorboardX==2.6.4
|
||||
threadpoolctl==3.6.0
|
||||
tokenizers==0.22.2
|
||||
tomlkit==0.13.3
|
||||
torch==2.5.1+rocm6.2
|
||||
torchaudio==2.5.1+rocm6.2
|
||||
torchcrepe==0.0.23
|
||||
torchfcpe==0.0.4
|
||||
torchvision==0.20.1+rocm6.2
|
||||
tornado==6.5.4
|
||||
tqdm==4.67.1
|
||||
traitlets==5.14.3
|
||||
transformers==4.57.3
|
||||
typeguard==4.4.4
|
||||
typer==0.21.1
|
||||
typing-inspection==0.4.2
|
||||
typing_extensions==4.15.0
|
||||
tzdata==2025.3
|
||||
uc-micro-py==1.0.3
|
||||
Unidecode==1.4.0
|
||||
urllib3==2.6.3
|
||||
uvicorn==0.40.0
|
||||
websockets==11.0.3
|
||||
Werkzeug==3.1.5
|
||||
yarl==1.22.0
|
||||
86
requirements.txt
Normal file
86
requirements.txt
Normal file
@@ -0,0 +1,86 @@
|
||||
# ==========================================================
|
||||
# Unified Soprano + RVC environment
|
||||
# Python == 3.10.19
|
||||
# ROCm == 6.2
|
||||
# ==========================================================
|
||||
|
||||
# ----------------------
|
||||
# Core ML / GPU stack
|
||||
# ----------------------
|
||||
torch==2.5.1+rocm6.2
|
||||
torchaudio==2.5.1+rocm6.2
|
||||
torchvision==0.20.1+rocm6.2
|
||||
pytorch-triton-rocm==3.1.0
|
||||
|
||||
# ----------------------
|
||||
# Numerical stack (RVC-safe)
|
||||
# ----------------------
|
||||
numpy==1.23.5
|
||||
scipy==1.15.3
|
||||
scikit-learn==1.7.2
|
||||
|
||||
# ----------------------
|
||||
# Audio processing
|
||||
# ----------------------
|
||||
sounddevice==0.5.3
|
||||
soundfile==0.13.1
|
||||
pydub==0.25.1
|
||||
librosa==0.10.2
|
||||
soxr==1.0.0
|
||||
resampy==0.4.3
|
||||
praat-parselmouth==0.4.7
|
||||
pyworld==0.3.2
|
||||
av==16.1.0
|
||||
|
||||
# ----------------------
|
||||
# RVC core
|
||||
# ----------------------
|
||||
fairseq==0.12.2
|
||||
faiss-cpu==1.7.3
|
||||
numba==0.56.4
|
||||
llvmlite==0.39.0
|
||||
torchcrepe==0.0.23
|
||||
torchfcpe==0.0.4
|
||||
einops==0.8.1
|
||||
local-attention==1.11.2
|
||||
omegaconf==2.0.6
|
||||
hydra-core==1.0.7
|
||||
|
||||
# ----------------------
|
||||
# Soprano TTS
|
||||
# ----------------------
|
||||
transformers==4.57.3
|
||||
accelerate==1.12.0
|
||||
tokenizers==0.22.2
|
||||
safetensors==0.7.0
|
||||
huggingface-hub==0.36.0
|
||||
inflect==7.5.0
|
||||
Unidecode==1.4.0
|
||||
|
||||
# ----------------------
|
||||
# Web / UI
|
||||
# ----------------------
|
||||
fastapi==0.88.0
|
||||
starlette==0.22.0
|
||||
uvicorn==0.40.0
|
||||
gradio==3.48.0
|
||||
gradio_client==0.6.1
|
||||
python-multipart==0.0.21
|
||||
orjson==3.11.5
|
||||
|
||||
# ----------------------
|
||||
# Utilities
|
||||
# ----------------------
|
||||
tqdm==4.67.1
|
||||
rich==14.2.0
|
||||
psutil==7.2.1
|
||||
requests==2.32.5
|
||||
regex==2025.11.3
|
||||
filelock==3.20.0
|
||||
packaging==25.0
|
||||
PyYAML==6.0.3
|
||||
|
||||
# ----------------------
|
||||
# Editable installs (local)
|
||||
# ----------------------
|
||||
-e git+https://github.com/ekwek1/soprano.git@5c759351f9e115aa364d5f4453ddaa7ee0d6f15e#egg=soprano_tts
|
||||
52
setup_alsa_bridge.sh
Executable file
52
setup_alsa_bridge.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
# Setup script to make soprano_to_rvc available as an ALSA device for RVC
|
||||
|
||||
ASOUND_RC="$HOME/.asoundrc"
|
||||
SINK_NAME="soprano_to_rvc"
|
||||
|
||||
echo "Setting up ALSA configuration for soprano_to_rvc..."
|
||||
|
||||
# Backup existing .asoundrc if it exists
|
||||
if [ -f "$ASOUND_RC" ]; then
|
||||
cp "$ASOUND_RC" "${ASOUND_RC}.backup.$(date +%s)"
|
||||
echo "✓ Backed up existing .asoundrc"
|
||||
fi
|
||||
|
||||
# Check if our configuration already exists
|
||||
if grep -q "pcm.soprano_rvc" "$ASOUND_RC" 2>/dev/null; then
|
||||
echo "✓ Configuration already exists in .asoundrc"
|
||||
else
|
||||
echo "Adding ALSA configuration..."
|
||||
|
||||
cat >> "$ASOUND_RC" << 'EOF'
|
||||
|
||||
# Soprano to RVC bridge
|
||||
pcm.soprano_rvc {
|
||||
type pulse
|
||||
device soprano_to_rvc.monitor
|
||||
hint {
|
||||
show on
|
||||
description "Soprano TTS to RVC Bridge"
|
||||
}
|
||||
}
|
||||
|
||||
ctl.soprano_rvc {
|
||||
type pulse
|
||||
device soprano_to_rvc.monitor
|
||||
}
|
||||
EOF
|
||||
|
||||
echo "✓ Added ALSA configuration to .asoundrc"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=" * 70
|
||||
echo "Setup complete!"
|
||||
echo ""
|
||||
echo "The virtual device 'soprano_rvc' is now available as an ALSA device."
|
||||
echo ""
|
||||
echo "In RVC GUI:"
|
||||
echo " 1. Set device type to 'ALSA'"
|
||||
echo " 2. Select 'soprano_rvc' or 'Soprano TTS to RVC Bridge' as input"
|
||||
echo " 3. Make sure the soprano_to_virtual_sink.py script is running"
|
||||
echo ""
|
||||
1
soprano
Submodule
1
soprano
Submodule
Submodule soprano added at 5c759351f9
299
soprano_to_virtual_sink.py
Executable file
299
soprano_to_virtual_sink.py
Executable file
@@ -0,0 +1,299 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Soprano TTS to Virtual Sink
|
||||
This script takes text input and streams Soprano TTS output to a virtual PulseAudio sink
|
||||
that can be used as input for RVC realtime voice conversion.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
import signal
|
||||
import sounddevice as sd
|
||||
import numpy as np
|
||||
import torch
|
||||
from scipy import signal as scipy_signal
|
||||
|
||||
# Add soprano to path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'soprano'))
|
||||
from soprano import SopranoTTS
|
||||
|
||||
# Configuration
|
||||
VIRTUAL_SINK_NAME = "soprano_to_rvc"
|
||||
SAMPLE_RATE = 48000 # Use 48kHz for better compatibility with audio systems
|
||||
SOPRANO_RATE = 32000 # Soprano outputs at 32kHz
|
||||
CHANNELS = 2 # Use stereo to match RVC expectations
|
||||
|
||||
# Global flag for graceful shutdown
|
||||
running = True
|
||||
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
"""Handle Ctrl+C gracefully"""
|
||||
global running
|
||||
print("\n\nShutting down gracefully...")
|
||||
running = False
|
||||
|
||||
|
||||
def create_virtual_sink():
|
||||
"""Create a PulseAudio virtual sink for audio output"""
|
||||
# Check if sink already exists
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["pactl", "list", "sinks", "short"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
if VIRTUAL_SINK_NAME in result.stdout:
|
||||
print(f"✓ Virtual sink '{VIRTUAL_SINK_NAME}' already exists")
|
||||
print(f" Monitor source: {VIRTUAL_SINK_NAME}.monitor")
|
||||
return True
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
print(f"Creating virtual sink: {VIRTUAL_SINK_NAME}")
|
||||
try:
|
||||
# Create a null sink (virtual audio device) at 48kHz for compatibility
|
||||
subprocess.run([
|
||||
"pactl", "load-module", "module-null-sink",
|
||||
f"sink_name={VIRTUAL_SINK_NAME}",
|
||||
f"sink_properties=device.description={VIRTUAL_SINK_NAME}",
|
||||
f"rate={SAMPLE_RATE}",
|
||||
"channels=2" # Stereo to match RVC expectations
|
||||
], check=True, capture_output=True)
|
||||
print(f"✓ Virtual sink '{VIRTUAL_SINK_NAME}' created successfully")
|
||||
print(f" Monitor source: {VIRTUAL_SINK_NAME}.monitor")
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"✗ Failed to create virtual sink: {e.stderr.decode()}")
|
||||
return False
|
||||
|
||||
|
||||
def remove_virtual_sink():
|
||||
"""Remove the virtual sink on exit"""
|
||||
print(f"\nRemoving virtual sink: {VIRTUAL_SINK_NAME}")
|
||||
try:
|
||||
# Find the module ID
|
||||
result = subprocess.run(
|
||||
["pactl", "list", "modules", "short"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
for line in result.stdout.split('\n'):
|
||||
if VIRTUAL_SINK_NAME in line:
|
||||
module_id = line.split()[0]
|
||||
subprocess.run(["pactl", "unload-module", module_id], check=True)
|
||||
print(f"✓ Virtual sink removed")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"✗ Error removing virtual sink: {e}")
|
||||
|
||||
|
||||
def get_virtual_sink_device_id():
|
||||
"""Get the sounddevice ID for our virtual sink"""
|
||||
# Force refresh device list
|
||||
sd._terminate()
|
||||
sd._initialize()
|
||||
|
||||
devices = sd.query_devices()
|
||||
for i, device in enumerate(devices):
|
||||
if VIRTUAL_SINK_NAME in device['name']:
|
||||
return i
|
||||
return None
|
||||
|
||||
|
||||
def stream_to_virtual_sink(tts_model, text, chunk_size=1):
|
||||
"""Stream soprano TTS output to the virtual sink"""
|
||||
device_id = get_virtual_sink_device_id()
|
||||
|
||||
if device_id is None:
|
||||
print(f"✗ Could not find virtual sink device: {VIRTUAL_SINK_NAME}")
|
||||
print(f"⚠️ Attempting to recreate virtual sink...")
|
||||
if create_virtual_sink():
|
||||
# Wait a moment for the device to appear
|
||||
import time
|
||||
time.sleep(1.0) # Increased wait time
|
||||
device_id = get_virtual_sink_device_id()
|
||||
if device_id is None:
|
||||
print(f"✗ Still could not find virtual sink after recreation")
|
||||
print(f"\n📋 Available devices:")
|
||||
devices = sd.query_devices()
|
||||
for i, dev in enumerate(devices):
|
||||
if 'soprano' in dev['name'].lower() or 'rvc' in dev['name'].lower():
|
||||
print(f" {i}: {dev['name']}")
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
|
||||
device_info = sd.query_devices(device_id)
|
||||
print(f"✓ Using output device: {device_info['name']}")
|
||||
|
||||
# Get the device's default sample rate if 32kHz isn't supported
|
||||
device_sr = int(device_info.get('default_samplerate', SAMPLE_RATE))
|
||||
if device_sr == 0 or device_sr != SAMPLE_RATE:
|
||||
device_sr = SAMPLE_RATE # Try with soprano's rate anyway
|
||||
|
||||
print(f" Sample rate: {device_sr} Hz")
|
||||
print(f"\n🎤 Generating and streaming speech...")
|
||||
print(f"Text: \"{text}\"\n")
|
||||
|
||||
try:
|
||||
# Generate streaming audio from soprano
|
||||
stream = tts_model.infer_stream(text, chunk_size=chunk_size)
|
||||
|
||||
# Open output stream to virtual sink
|
||||
with sd.OutputStream(
|
||||
samplerate=SAMPLE_RATE,
|
||||
channels=CHANNELS,
|
||||
dtype='float32',
|
||||
device=device_id,
|
||||
blocksize=0
|
||||
) as out_stream:
|
||||
first_chunk = True
|
||||
for chunk in stream:
|
||||
if not running:
|
||||
break
|
||||
|
||||
if first_chunk:
|
||||
print("✓ First audio chunk generated and streaming started")
|
||||
first_chunk = False
|
||||
|
||||
# Convert torch tensor to numpy if needed
|
||||
if isinstance(chunk, torch.Tensor):
|
||||
chunk = chunk.detach().cpu().numpy()
|
||||
|
||||
# Ensure correct shape for mono audio
|
||||
if chunk.ndim == 1:
|
||||
chunk_1d = chunk
|
||||
elif chunk.ndim == 2 and chunk.shape[0] == 1:
|
||||
chunk_1d = chunk.flatten()
|
||||
elif chunk.ndim == 2 and chunk.shape[1] == 1:
|
||||
chunk_1d = chunk.flatten()
|
||||
else:
|
||||
chunk_1d = chunk.flatten()
|
||||
|
||||
# Check for invalid values before resampling
|
||||
if not np.all(np.isfinite(chunk_1d)):
|
||||
print(f"⚠️ Warning: Invalid values in soprano output, cleaning...")
|
||||
chunk_1d = np.nan_to_num(chunk_1d, nan=0.0, posinf=1.0, neginf=-1.0)
|
||||
|
||||
# Resample from 32kHz (Soprano) to 48kHz (output) if needed
|
||||
if SOPRANO_RATE != SAMPLE_RATE:
|
||||
num_samples = int(len(chunk_1d) * SAMPLE_RATE / SOPRANO_RATE)
|
||||
chunk_resampled = scipy_signal.resample(chunk_1d, num_samples)
|
||||
else:
|
||||
chunk_resampled = chunk_1d
|
||||
|
||||
# Ensure no NaN or inf values after resampling (clip to valid range)
|
||||
if not np.all(np.isfinite(chunk_resampled)):
|
||||
print(f"⚠️ Warning: Invalid values after resampling, cleaning...")
|
||||
chunk_resampled = np.nan_to_num(chunk_resampled, nan=0.0, posinf=1.0, neginf=-1.0)
|
||||
chunk_resampled = np.clip(chunk_resampled, -1.0, 1.0)
|
||||
|
||||
# Reshape to (N, 2) for stereo output (duplicate mono to both channels)
|
||||
chunk_stereo = np.column_stack((chunk_resampled, chunk_resampled)).astype(np.float32)
|
||||
|
||||
# Write to virtual sink
|
||||
out_stream.write(chunk_stereo)
|
||||
|
||||
print("✓ Speech generation and streaming completed")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Error during streaming: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function"""
|
||||
global running
|
||||
|
||||
# Set up signal handler for graceful shutdown
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
print("=" * 70)
|
||||
print("Soprano TTS to Virtual Sink for RVC")
|
||||
print("=" * 70)
|
||||
print()
|
||||
|
||||
# Create virtual sink
|
||||
if not create_virtual_sink():
|
||||
print("\n⚠️ If sink already exists, removing and recreating...")
|
||||
remove_virtual_sink()
|
||||
if not create_virtual_sink():
|
||||
print("✗ Failed to create virtual sink. Exiting.")
|
||||
return 1
|
||||
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("Virtual sink setup complete!")
|
||||
print("=" * 70)
|
||||
print()
|
||||
print("📝 Next steps:")
|
||||
print(f" 1. Open RVC realtime GUI (gui_v1.py)")
|
||||
print(f" 2. Select '{VIRTUAL_SINK_NAME}.monitor' as the INPUT device")
|
||||
print(f" 3. Select your desired output device")
|
||||
print(f" 4. Load your RVC model and start conversion")
|
||||
print(f" 5. Return here and type text to convert")
|
||||
print()
|
||||
print("=" * 70)
|
||||
print()
|
||||
|
||||
# Initialize Soprano TTS
|
||||
print("🔄 Loading Soprano TTS model...")
|
||||
try:
|
||||
tts = SopranoTTS(
|
||||
backend='auto',
|
||||
device='auto',
|
||||
cache_size_mb=100,
|
||||
decoder_batch_size=1
|
||||
)
|
||||
print("✓ Soprano TTS model loaded successfully")
|
||||
except Exception as e:
|
||||
print(f"✗ Failed to load Soprano TTS: {e}")
|
||||
remove_virtual_sink()
|
||||
return 1
|
||||
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("Ready! Type text to generate speech (Ctrl+C to exit)")
|
||||
print("=" * 70)
|
||||
print()
|
||||
|
||||
# Main loop - get text input and generate speech
|
||||
try:
|
||||
while running:
|
||||
try:
|
||||
text = input("\n🎙️ Enter text: ").strip()
|
||||
|
||||
if not text:
|
||||
print("⚠️ Please enter some text")
|
||||
continue
|
||||
|
||||
if text.lower() in ['quit', 'exit', 'q']:
|
||||
break
|
||||
|
||||
# Stream the text to the virtual sink
|
||||
stream_to_virtual_sink(tts, text, chunk_size=1)
|
||||
print()
|
||||
|
||||
except EOFError:
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n⚠️ Interrupted by user")
|
||||
|
||||
finally:
|
||||
# Clean up
|
||||
remove_virtual_sink()
|
||||
print("\n✓ Cleanup complete. Goodbye!")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user