126 lines
4.4 KiB
Python
126 lines
4.4 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Test client for VAD-enabled server
|
||
Simulates Discord bot audio streaming with speech detection
|
||
"""
|
||
import asyncio
|
||
import websockets
|
||
import json
|
||
import numpy as np
|
||
import soundfile as sf
|
||
import sys
|
||
|
||
|
||
async def test_vad_server(audio_file="test.wav"):
|
||
"""Test VAD server with audio file."""
|
||
uri = "ws://localhost:8766"
|
||
|
||
print(f"Connecting to {uri}...")
|
||
|
||
try:
|
||
async with websockets.connect(uri) as websocket:
|
||
print("✓ Connected!\n")
|
||
|
||
# Receive welcome message
|
||
message = await websocket.recv()
|
||
data = json.loads(message)
|
||
print(f"Server says: {data.get('message')}")
|
||
print(f"VAD enabled: {data.get('vad_enabled')}\n")
|
||
|
||
# Load audio file
|
||
print(f"Loading audio: {audio_file}")
|
||
audio, sr = sf.read(audio_file, dtype='float32')
|
||
|
||
if audio.ndim > 1:
|
||
audio = audio[:, 0] # Mono
|
||
|
||
print(f"Duration: {len(audio)/sr:.2f}s")
|
||
print(f"Sample rate: {sr} Hz\n")
|
||
|
||
# Convert to int16
|
||
audio_int16 = (audio * 32767).astype(np.int16)
|
||
|
||
# Listen for responses in background
|
||
async def receive_messages():
|
||
"""Receive and display server messages."""
|
||
try:
|
||
while True:
|
||
response = await websocket.recv()
|
||
result = json.loads(response)
|
||
|
||
msg_type = result.get('type')
|
||
|
||
if msg_type == 'vad_status':
|
||
is_speech = result.get('is_speech')
|
||
if is_speech:
|
||
print("\n🎤 VAD: Speech detected\n")
|
||
else:
|
||
print("\n🛑 VAD: Speech ended\n")
|
||
|
||
elif msg_type == 'transcript':
|
||
text = result.get('text', '')
|
||
duration = result.get('duration', 0)
|
||
is_final = result.get('is_final', False)
|
||
|
||
if is_final:
|
||
print(f"\n{'='*70}")
|
||
print(f"✅ FINAL TRANSCRIPTION ({duration:.2f}s):")
|
||
print(f" \"{text}\"")
|
||
print(f"{'='*70}\n")
|
||
else:
|
||
print(f"📝 PARTIAL ({duration:.2f}s): {text}")
|
||
|
||
elif msg_type == 'info':
|
||
print(f"ℹ️ {result.get('message')}")
|
||
|
||
elif msg_type == 'error':
|
||
print(f"❌ Error: {result.get('message')}")
|
||
|
||
except Exception as e:
|
||
pass
|
||
|
||
# Start listener
|
||
listen_task = asyncio.create_task(receive_messages())
|
||
|
||
# Send audio in small chunks (simulate streaming)
|
||
chunk_size = int(sr * 0.1) # 100ms chunks
|
||
print("Streaming audio...\n")
|
||
|
||
for i in range(0, len(audio_int16), chunk_size):
|
||
chunk = audio_int16[i:i+chunk_size]
|
||
await websocket.send(chunk.tobytes())
|
||
await asyncio.sleep(0.05) # Simulate real-time
|
||
|
||
print("\nAll audio sent. Waiting for final transcription...")
|
||
|
||
# Wait for processing
|
||
await asyncio.sleep(3.0)
|
||
|
||
# Force transcribe any remaining buffer
|
||
print("Sending force_transcribe command...\n")
|
||
await websocket.send(json.dumps({"type": "force_transcribe"}))
|
||
|
||
# Wait a bit more
|
||
await asyncio.sleep(2.0)
|
||
|
||
# Cancel listener
|
||
listen_task.cancel()
|
||
try:
|
||
await listen_task
|
||
except asyncio.CancelledError:
|
||
pass
|
||
|
||
print("\n✓ Test completed!")
|
||
|
||
except Exception as e:
|
||
print(f"❌ Error: {e}")
|
||
return 1
|
||
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
audio_file = sys.argv[1] if len(sys.argv) > 1 else "test.wav"
|
||
exit_code = asyncio.run(test_vad_server(audio_file))
|
||
sys.exit(exit_code)
|