Decided on Parakeet ONNX Runtime. Works pretty great. Realtime voice chat possible now. UX lacking.
This commit is contained in:
@@ -27,7 +27,7 @@ class STTClient:
|
||||
def __init__(
|
||||
self,
|
||||
user_id: str,
|
||||
stt_url: str = "ws://miku-stt:8000/ws/stt",
|
||||
stt_url: str = "ws://miku-stt:8766/ws/stt",
|
||||
on_vad_event: Optional[Callable] = None,
|
||||
on_partial_transcript: Optional[Callable] = None,
|
||||
on_final_transcript: Optional[Callable] = None,
|
||||
@@ -140,6 +140,44 @@ class STTClient:
|
||||
logger.error(f"Failed to send audio to STT: {e}")
|
||||
self.connected = False
|
||||
|
||||
async def send_final(self):
|
||||
"""
|
||||
Request final transcription from STT server.
|
||||
|
||||
Call this when the user stops speaking to get the final transcript.
|
||||
"""
|
||||
if not self.connected or not self.websocket:
|
||||
logger.warning(f"Cannot send final command, not connected for user {self.user_id}")
|
||||
return
|
||||
|
||||
try:
|
||||
command = json.dumps({"type": "final"})
|
||||
await self.websocket.send_str(command)
|
||||
logger.debug(f"Sent final command to STT")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to send final command to STT: {e}")
|
||||
self.connected = False
|
||||
|
||||
async def send_reset(self):
|
||||
"""
|
||||
Reset the STT server's audio buffer.
|
||||
|
||||
Call this to clear any buffered audio.
|
||||
"""
|
||||
if not self.connected or not self.websocket:
|
||||
logger.warning(f"Cannot send reset command, not connected for user {self.user_id}")
|
||||
return
|
||||
|
||||
try:
|
||||
command = json.dumps({"type": "reset"})
|
||||
await self.websocket.send_str(command)
|
||||
logger.debug(f"Sent reset command to STT")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to send reset command to STT: {e}")
|
||||
self.connected = False
|
||||
|
||||
async def _receive_events(self):
|
||||
"""Background task to receive events from STT server."""
|
||||
try:
|
||||
@@ -177,14 +215,29 @@ class STTClient:
|
||||
"""
|
||||
event_type = event.get('type')
|
||||
|
||||
if event_type == 'vad':
|
||||
# VAD event: speech detection
|
||||
if event_type == 'transcript':
|
||||
# New ONNX server protocol: single transcript type with is_final flag
|
||||
text = event.get('text', '')
|
||||
is_final = event.get('is_final', False)
|
||||
timestamp = event.get('timestamp', 0)
|
||||
|
||||
if is_final:
|
||||
logger.info(f"Final transcript [{self.user_id}]: {text}")
|
||||
if self.on_final_transcript:
|
||||
await self.on_final_transcript(text, timestamp)
|
||||
else:
|
||||
logger.info(f"Partial transcript [{self.user_id}]: {text}")
|
||||
if self.on_partial_transcript:
|
||||
await self.on_partial_transcript(text, timestamp)
|
||||
|
||||
elif event_type == 'vad':
|
||||
# VAD event: speech detection (legacy support)
|
||||
logger.debug(f"VAD event: {event}")
|
||||
if self.on_vad_event:
|
||||
await self.on_vad_event(event)
|
||||
|
||||
elif event_type == 'partial':
|
||||
# Partial transcript
|
||||
# Legacy protocol support: partial transcript
|
||||
text = event.get('text', '')
|
||||
timestamp = event.get('timestamp', 0)
|
||||
logger.info(f"Partial transcript [{self.user_id}]: {text}")
|
||||
@@ -192,7 +245,7 @@ class STTClient:
|
||||
await self.on_partial_transcript(text, timestamp)
|
||||
|
||||
elif event_type == 'final':
|
||||
# Final transcript
|
||||
# Legacy protocol support: final transcript
|
||||
text = event.get('text', '')
|
||||
timestamp = event.get('timestamp', 0)
|
||||
logger.info(f"Final transcript [{self.user_id}]: {text}")
|
||||
@@ -200,12 +253,20 @@ class STTClient:
|
||||
await self.on_final_transcript(text, timestamp)
|
||||
|
||||
elif event_type == 'interruption':
|
||||
# Interruption detected
|
||||
# Interruption detected (legacy support)
|
||||
probability = event.get('probability', 0)
|
||||
logger.info(f"Interruption detected from user {self.user_id} (prob={probability:.3f})")
|
||||
if self.on_interruption:
|
||||
await self.on_interruption(probability)
|
||||
|
||||
elif event_type == 'info':
|
||||
# Info message
|
||||
logger.info(f"STT info: {event.get('message', '')}")
|
||||
|
||||
elif event_type == 'error':
|
||||
# Error message
|
||||
logger.error(f"STT error: {event.get('message', '')}")
|
||||
|
||||
else:
|
||||
logger.warning(f"Unknown STT event type: {event_type}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user