Add gitignor
This commit is contained in:
109
assistant/stt.py
Normal file
109
assistant/stt.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Transcription vocale temps réel via Voxtral Mini Transcribe Realtime.
|
||||
|
||||
Flux : microphone (PCM 16kHz) → WebSocket Voxtral → texte transcrit
|
||||
"""
|
||||
import asyncio
|
||||
import sys
|
||||
from typing import AsyncIterator
|
||||
|
||||
import numpy as np
|
||||
import sounddevice as sd
|
||||
from mistralai.client import Mistral
|
||||
from mistralai.client.models import (
|
||||
AudioFormat,
|
||||
TranscriptionStreamDone,
|
||||
TranscriptionStreamTextDelta,
|
||||
)
|
||||
|
||||
from . import config
|
||||
|
||||
STT_MODEL = "voxtral-mini-transcribe-realtime-2602"
|
||||
SAMPLE_RATE = 16000
|
||||
CHANNELS = 1
|
||||
CHUNK_FRAMES = 1600 # 100ms de son par chunk
|
||||
|
||||
|
||||
async def _mic_stream(stop_event: asyncio.Event) -> AsyncIterator[bytes]:
|
||||
"""Capture le microphone et yield des chunks PCM int16 jusqu'à stop_event."""
|
||||
loop = asyncio.get_event_loop()
|
||||
queue: asyncio.Queue[bytes | None] = asyncio.Queue()
|
||||
|
||||
def callback(indata: np.ndarray, frames: int, time, status) -> None:
|
||||
if status:
|
||||
print(f"[Mic] {status}", file=sys.stderr)
|
||||
# Convertir en int16 little-endian et envoyer
|
||||
pcm = (indata[:, 0] * 32767).astype(np.int16).tobytes()
|
||||
loop.call_soon_threadsafe(queue.put_nowait, pcm)
|
||||
|
||||
stream = sd.InputStream(
|
||||
samplerate=SAMPLE_RATE,
|
||||
channels=CHANNELS,
|
||||
dtype="float32",
|
||||
blocksize=CHUNK_FRAMES,
|
||||
callback=callback,
|
||||
)
|
||||
|
||||
with stream:
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
chunk = await asyncio.wait_for(queue.get(), timeout=0.2)
|
||||
yield chunk
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
|
||||
# Vider la queue restante
|
||||
while not queue.empty():
|
||||
chunk = queue.get_nowait()
|
||||
if chunk:
|
||||
yield chunk
|
||||
|
||||
|
||||
async def transcribe_from_mic() -> str:
|
||||
"""
|
||||
Écoute le microphone jusqu'à ce que l'utilisateur appuie sur Entrée,
|
||||
puis retourne le texte transcrit.
|
||||
"""
|
||||
client = Mistral(api_key=config.MISTRAL_API_KEY)
|
||||
stop_event = asyncio.Event()
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
print("🎤 Parlez... (Entrée pour arrêter)")
|
||||
|
||||
# Attendre Entrée dans un thread pour ne pas bloquer l'event loop
|
||||
async def wait_for_enter() -> None:
|
||||
await loop.run_in_executor(None, input)
|
||||
stop_event.set()
|
||||
|
||||
enter_task = asyncio.create_task(wait_for_enter())
|
||||
|
||||
audio_fmt = AudioFormat(
|
||||
encoding="pcm_s16le",
|
||||
sample_rate=SAMPLE_RATE,
|
||||
)
|
||||
|
||||
final_text = ""
|
||||
|
||||
try:
|
||||
async for event in client.audio.realtime.transcribe_stream(
|
||||
audio_stream=_mic_stream(stop_event),
|
||||
model=STT_MODEL,
|
||||
audio_format=audio_fmt,
|
||||
target_streaming_delay_ms=300,
|
||||
):
|
||||
if isinstance(event, TranscriptionStreamTextDelta):
|
||||
# Affichage en temps réel du texte partiel
|
||||
print(event.text, end="", flush=True)
|
||||
elif isinstance(event, TranscriptionStreamDone):
|
||||
final_text = event.text
|
||||
print() # saut de ligne après la transcription
|
||||
break
|
||||
finally:
|
||||
stop_event.set()
|
||||
enter_task.cancel()
|
||||
try:
|
||||
await enter_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
return final_text.strip()
|
||||
Reference in New Issue
Block a user