""" Transcription vocale temps réel via Voxtral Mini Transcribe Realtime. Flux : microphone (PCM 16kHz) → WebSocket Voxtral → texte transcrit """ import asyncio import sys from typing import AsyncIterator import numpy as np import sounddevice as sd from mistralai.client import Mistral from mistralai.client.models import ( AudioFormat, TranscriptionStreamDone, TranscriptionStreamTextDelta, ) from . import config STT_MODEL = "voxtral-mini-transcribe-realtime-2602" SAMPLE_RATE = 16000 CHANNELS = 1 CHUNK_FRAMES = 1600 # 100ms de son par chunk async def _mic_stream(stop_event: asyncio.Event) -> AsyncIterator[bytes]: """Capture le microphone et yield des chunks PCM int16 jusqu'à stop_event.""" loop = asyncio.get_event_loop() queue: asyncio.Queue[bytes | None] = asyncio.Queue() def callback(indata: np.ndarray, frames: int, time, status) -> None: if status: print(f"[Mic] {status}", file=sys.stderr) # Convertir en int16 little-endian et envoyer pcm = (indata[:, 0] * 32767).astype(np.int16).tobytes() loop.call_soon_threadsafe(queue.put_nowait, pcm) stream = sd.InputStream( samplerate=SAMPLE_RATE, channels=CHANNELS, dtype="float32", blocksize=CHUNK_FRAMES, callback=callback, ) with stream: while not stop_event.is_set(): try: chunk = await asyncio.wait_for(queue.get(), timeout=0.2) yield chunk except asyncio.TimeoutError: continue # Vider la queue restante while not queue.empty(): chunk = queue.get_nowait() if chunk: yield chunk async def transcribe_from_mic() -> str: """ Écoute le microphone jusqu'à ce que l'utilisateur appuie sur Entrée, puis retourne le texte transcrit. """ client = Mistral(api_key=config.MISTRAL_API_KEY) stop_event = asyncio.Event() loop = asyncio.get_event_loop() print("🎤 Parlez... (Entrée pour arrêter)") # Attendre Entrée dans un thread pour ne pas bloquer l'event loop async def wait_for_enter() -> None: await loop.run_in_executor(None, input) stop_event.set() enter_task = asyncio.create_task(wait_for_enter()) audio_fmt = AudioFormat( encoding="pcm_s16le", sample_rate=SAMPLE_RATE, ) final_text = "" try: async for event in client.audio.realtime.transcribe_stream( audio_stream=_mic_stream(stop_event), model=STT_MODEL, audio_format=audio_fmt, target_streaming_delay_ms=300, ): if isinstance(event, TranscriptionStreamTextDelta): # Affichage en temps réel du texte partiel print(event.text, end="", flush=True) elif isinstance(event, TranscriptionStreamDone): final_text = event.text print() # saut de ligne après la transcription break finally: stop_event.set() enter_task.cancel() try: await enter_task except asyncio.CancelledError: pass return final_text.strip()