110 lines
3.1 KiB
Python
110 lines
3.1 KiB
Python
"""
|
|
Transcription vocale temps réel via Voxtral Mini Transcribe Realtime.
|
|
|
|
Flux : microphone (PCM 16kHz) → WebSocket Voxtral → texte transcrit
|
|
"""
|
|
import asyncio
|
|
import sys
|
|
from typing import AsyncIterator
|
|
|
|
import numpy as np
|
|
import sounddevice as sd
|
|
from mistralai.client import Mistral
|
|
from mistralai.client.models import (
|
|
AudioFormat,
|
|
TranscriptionStreamDone,
|
|
TranscriptionStreamTextDelta,
|
|
)
|
|
|
|
from . import config
|
|
|
|
STT_MODEL = "voxtral-mini-transcribe-realtime-2602"
|
|
SAMPLE_RATE = 16000
|
|
CHANNELS = 1
|
|
CHUNK_FRAMES = 1600 # 100ms de son par chunk
|
|
|
|
|
|
async def _mic_stream(stop_event: asyncio.Event) -> AsyncIterator[bytes]:
|
|
"""Capture le microphone et yield des chunks PCM int16 jusqu'à stop_event."""
|
|
loop = asyncio.get_event_loop()
|
|
queue: asyncio.Queue[bytes | None] = asyncio.Queue()
|
|
|
|
def callback(indata: np.ndarray, frames: int, time, status) -> None:
|
|
if status:
|
|
print(f"[Mic] {status}", file=sys.stderr)
|
|
# Convertir en int16 little-endian et envoyer
|
|
pcm = (indata[:, 0] * 32767).astype(np.int16).tobytes()
|
|
loop.call_soon_threadsafe(queue.put_nowait, pcm)
|
|
|
|
stream = sd.InputStream(
|
|
samplerate=SAMPLE_RATE,
|
|
channels=CHANNELS,
|
|
dtype="float32",
|
|
blocksize=CHUNK_FRAMES,
|
|
callback=callback,
|
|
)
|
|
|
|
with stream:
|
|
while not stop_event.is_set():
|
|
try:
|
|
chunk = await asyncio.wait_for(queue.get(), timeout=0.2)
|
|
yield chunk
|
|
except asyncio.TimeoutError:
|
|
continue
|
|
|
|
# Vider la queue restante
|
|
while not queue.empty():
|
|
chunk = queue.get_nowait()
|
|
if chunk:
|
|
yield chunk
|
|
|
|
|
|
async def transcribe_from_mic() -> str:
|
|
"""
|
|
Écoute le microphone jusqu'à ce que l'utilisateur appuie sur Entrée,
|
|
puis retourne le texte transcrit.
|
|
"""
|
|
client = Mistral(api_key=config.MISTRAL_API_KEY)
|
|
stop_event = asyncio.Event()
|
|
loop = asyncio.get_event_loop()
|
|
|
|
print("🎤 Parlez... (Entrée pour arrêter)")
|
|
|
|
# Attendre Entrée dans un thread pour ne pas bloquer l'event loop
|
|
async def wait_for_enter() -> None:
|
|
await loop.run_in_executor(None, input)
|
|
stop_event.set()
|
|
|
|
enter_task = asyncio.create_task(wait_for_enter())
|
|
|
|
audio_fmt = AudioFormat(
|
|
encoding="pcm_s16le",
|
|
sample_rate=SAMPLE_RATE,
|
|
)
|
|
|
|
final_text = ""
|
|
|
|
try:
|
|
async for event in client.audio.realtime.transcribe_stream(
|
|
audio_stream=_mic_stream(stop_event),
|
|
model=STT_MODEL,
|
|
audio_format=audio_fmt,
|
|
target_streaming_delay_ms=300,
|
|
):
|
|
if isinstance(event, TranscriptionStreamTextDelta):
|
|
# Affichage en temps réel du texte partiel
|
|
print(event.text, end="", flush=True)
|
|
elif isinstance(event, TranscriptionStreamDone):
|
|
final_text = event.text
|
|
print() # saut de ligne après la transcription
|
|
break
|
|
finally:
|
|
stop_event.set()
|
|
enter_task.cancel()
|
|
try:
|
|
await enter_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
return final_text.strip()
|