Add gitignor

2026-04-07 22:06:36 +02:00
parent 49802d89d5
commit 5703cf5871
98 changed files with 5329 additions and 72 deletions
--- a/assistant/stt.py
+++ b/assistant/stt.py
@@ -0,0 +1,109 @@
+"""
+Transcription vocale temps réel via Voxtral Mini Transcribe Realtime.
+
+Flux : microphone (PCM 16kHz) → WebSocket Voxtral → texte transcrit
+"""
+import asyncio
+import sys
+from typing import AsyncIterator
+
+import numpy as np
+import sounddevice as sd
+from mistralai.client import Mistral
+from mistralai.client.models import (
+    AudioFormat,
+    TranscriptionStreamDone,
+    TranscriptionStreamTextDelta,
+)
+
+from . import config
+
+STT_MODEL = "voxtral-mini-transcribe-realtime-2602"
+SAMPLE_RATE = 16000
+CHANNELS = 1
+CHUNK_FRAMES = 1600  # 100ms de son par chunk
+
+
+async def _mic_stream(stop_event: asyncio.Event) -> AsyncIterator[bytes]:
+    """Capture le microphone et yield des chunks PCM int16 jusqu'à stop_event."""
+    loop = asyncio.get_event_loop()
+    queue: asyncio.Queue[bytes | None] = asyncio.Queue()
+
+    def callback(indata: np.ndarray, frames: int, time, status) -> None:
+        if status:
+            print(f"[Mic] {status}", file=sys.stderr)
+        # Convertir en int16 little-endian et envoyer
+        pcm = (indata[:, 0] * 32767).astype(np.int16).tobytes()
+        loop.call_soon_threadsafe(queue.put_nowait, pcm)
+
+    stream = sd.InputStream(
+        samplerate=SAMPLE_RATE,
+        channels=CHANNELS,
+        dtype="float32",
+        blocksize=CHUNK_FRAMES,
+        callback=callback,
+    )
+
+    with stream:
+        while not stop_event.is_set():
+            try:
+                chunk = await asyncio.wait_for(queue.get(), timeout=0.2)
+                yield chunk
+            except asyncio.TimeoutError:
+                continue
+
+    # Vider la queue restante
+    while not queue.empty():
+        chunk = queue.get_nowait()
+        if chunk:
+            yield chunk
+
+
+async def transcribe_from_mic() -> str:
+    """
+    Écoute le microphone jusqu'à ce que l'utilisateur appuie sur Entrée,
+    puis retourne le texte transcrit.
+    """
+    client = Mistral(api_key=config.MISTRAL_API_KEY)
+    stop_event = asyncio.Event()
+    loop = asyncio.get_event_loop()
+
+    print("🎤 Parlez... (Entrée pour arrêter)")
+
+    # Attendre Entrée dans un thread pour ne pas bloquer l'event loop
+    async def wait_for_enter() -> None:
+        await loop.run_in_executor(None, input)
+        stop_event.set()
+
+    enter_task = asyncio.create_task(wait_for_enter())
+
+    audio_fmt = AudioFormat(
+        encoding="pcm_s16le",
+        sample_rate=SAMPLE_RATE,
+    )
+
+    final_text = ""
+
+    try:
+        async for event in client.audio.realtime.transcribe_stream(
+            audio_stream=_mic_stream(stop_event),
+            model=STT_MODEL,
+            audio_format=audio_fmt,
+            target_streaming_delay_ms=300,
+        ):
+            if isinstance(event, TranscriptionStreamTextDelta):
+                # Affichage en temps réel du texte partiel
+                print(event.text, end="", flush=True)
+            elif isinstance(event, TranscriptionStreamDone):
+                final_text = event.text
+                print()  # saut de ligne après la transcription
+                break
+    finally:
+        stop_event.set()
+        enter_task.cancel()
+        try:
+            await enter_task
+        except asyncio.CancelledError:
+            pass
+
+    return final_text.strip()