Add gitignor

2026-04-07 22:06:36 +02:00
parent 49802d89d5
commit 5703cf5871
98 changed files with 5329 additions and 72 deletions
@@ -0,0 +1,84 @@
+import sys
+import subprocess
+from . import config
+
+
+def play_audio(pcm_bytes: bytes) -> None:
+    """
+    Joue des bytes PCM bruts (S16LE, mono) via le lecteur système.
+
+    - Linux  : pipe direct vers aplay (aucun fichier temporaire)
+    - macOS  : pipe vers afplay via stdin (format AIFF/raw)
+    - Windows: conversion via PowerShell (fallback)
+    """
+    platform = sys.platform
+
+    if platform.startswith("linux"):
+        _play_pcm_aplay(pcm_bytes)
+    elif platform == "darwin":
+        _play_pcm_macos(pcm_bytes)
+    elif platform == "win32":
+        _play_pcm_windows(pcm_bytes)
+    else:
+        raise RuntimeError(f"Plateforme non supportée : {platform}")
+
+
+def _play_pcm_aplay(pcm_bytes: bytes) -> None:
+    """Pipe WAV directement vers aplay (auto-détecte le format depuis le header)."""
+    proc = subprocess.Popen(
+        ["aplay", "-q", "-"],
+        stdin=subprocess.PIPE,
+    )
+    proc.communicate(pcm_bytes)
+    if proc.returncode != 0:
+        raise RuntimeError(f"aplay a échoué (code {proc.returncode})")
+
+
+def _play_pcm_macos(pcm_bytes: bytes) -> None:
+    """Joue du WAV sur macOS via afplay (pipe stdin via fichier temporaire)."""
+    import tempfile, os
+    # afplay ne lit pas depuis stdin, on utilise un fichier temporaire
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+        tmp.write(pcm_bytes)
+        tmp_path = tmp.name
+    try:
+        subprocess.run(["afplay", tmp_path], check=True)
+    finally:
+        os.unlink(tmp_path)
+
+
+def _play_pcm_windows(pcm_bytes: bytes) -> None:
+    import tempfile, os
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+        tmp.write(_pcm_to_wav(pcm_bytes))
+        tmp_path = tmp.name
+    try:
+        subprocess.run(
+            ["powershell", "-c", f'(New-Object Media.SoundPlayer "{tmp_path}").PlaySync()'],
+            check=True,
+        )
+    finally:
+        os.unlink(tmp_path)
+
+
+def _pcm_to_wav(pcm_bytes: bytes) -> bytes:
+    """Ajoute un header WAV minimal au PCM brut S16LE mono."""
+    import struct
+    sample_rate = config.TTS_PCM_SAMPLE_RATE
+    num_channels = 1
+    bits_per_sample = 16
+    byte_rate = sample_rate * num_channels * bits_per_sample // 8
+    block_align = num_channels * bits_per_sample // 8
+    data_size = len(pcm_bytes)
+    header = struct.pack(
+        "<4sI4s4sIHHIIHH4sI",
+        b"RIFF", 36 + data_size, b"WAVE",
+        b"fmt ", 16, 1, num_channels,
+        sample_rate, byte_rate, block_align, bits_per_sample,
+        b"data", data_size,
+    )
+    return header + pcm_bytes
+
+
+def _command_exists(cmd: str) -> bool:
+    return subprocess.run(["which", cmd], capture_output=True).returncode == 0
@@ -1,5 +1,8 @@
 import asyncio
+import queue as _queue
+import re
 import sys
+import threading
 from . import llm, tts, audio, config


@@ -34,22 +37,122 @@ def _set_voice(parts: list[str]) -> None:
        print(f"Voix définie sur : {config.VOICE_ID}")


+# ---------------------------------------------------------------------------
+# Sentence splitting & markdown cleaning for TTS pipeline
+# ---------------------------------------------------------------------------
+
+# Split on sentence boundaries using fixed-width lookbehinds (Python constraint).
+# For ".": 3-char lookbehind — require the 2 chars before the period to be
+#   lowercase, digit, hyphen, or closing bracket (NOT uppercase).  This protects
+#   "M.", "No.", "A." while correctly splitting "arrivé.", "Jump-2.", "(J2).", "ok."
+_SENT_RE = re.compile(
+    r'(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.])\s+'               # word(2+) + period
+    r'|(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.][»"\')\]])\s+'     # + closing quote
+    r'|(?<=[!?…])\s+'                                          # ! ? …
+    r'|(?<=[!?…][»"\')\]])\s+'                                 # ! ? … + closing quote
+    r'|\n{2,}'                                                 # paragraph break
+)
+
+# Markdown patterns to strip before TTS (keep inner text where applicable)
+_MD_CODE_BLOCK = re.compile(r'```.*?```', re.DOTALL)
+_MD_INLINE      = re.compile(r'\*{1,3}(.*?)\*{1,3}|_{1,2}(.*?)_{1,2}|~~(.*?)~~|`([^`]+)`', re.DOTALL)
+_MD_LINK        = re.compile(r'\[([^\]]*)\]\([^\)]*\)')
+_MD_HEADER      = re.compile(r'^#{1,6}\s+', re.MULTILINE)
+_MULTI_SPACE    = re.compile(r'\s+')
+
+
+def _split_sentences(text: str) -> tuple[list[str], str]:
+    """Extrait les phrases complètes d'un buffer partiel.
+
+    Returns (complete_sentences, remainder).
+    """
+    parts = _SENT_RE.split(text)
+    if len(parts) <= 1:
+        return [], text
+    return parts[:-1], parts[-1]
+
+
+def _clean_for_tts(text: str) -> str:
+    """Supprime le formatage Markdown avant la synthèse vocale."""
+    text = _MD_CODE_BLOCK.sub(' ', text)
+    text = _MD_INLINE.sub(lambda m: next(g for g in m.groups() if g is not None), text)
+    text = _MD_LINK.sub(r'\1', text)
+    text = _MD_HEADER.sub('', text)
+    text = text.replace('→', ' vers ').replace('←', ' depuis ')
+    text = _MULTI_SPACE.sub(' ', text).strip()
+    return text
+
+
+# ---------------------------------------------------------------------------
+# Message processing — streaming TTS pipeline
+# ---------------------------------------------------------------------------
+
 def _process_message(user_input: str) -> None:
-    """Envoie un message au LLM et lit la réponse à voix haute."""
-    print(f"Arioch > ", end="", flush=True)
-    try:
-        reply = llm.chat(user_input)
-    except Exception as e:
-        print(f"\n[Erreur LLM] {e}")
-        return
+    """Envoie un message au LLM et lit la réponse à voix haute.

-    print(reply)
+    Pipeline 3 étages en parallèle :
+      [LLM stream] → sentence_queue → [TTS thread] → audio_queue → [Audio thread]

+    Dès qu'une phrase complète est détectée dans le stream LLM, elle part
+    immédiatement en synthèse Voxtral. L'audio est joué dès qu'il est prêt,
+    pendant que la phrase suivante est déjà en cours de synthèse.
+    """
+    SENTINEL = object()
+    sentence_q: _queue.Queue = _queue.Queue()
+    audio_q: _queue.Queue = _queue.Queue()
+
+    def tts_worker() -> None:
+        while True:
+            item = sentence_q.get()
+            if item is SENTINEL:
+                audio_q.put(SENTINEL)
+                return
+            text = _clean_for_tts(item)
+            if text:
+                try:
+                    audio_bytes = tts.text_to_speech(text)
+                    audio_q.put(audio_bytes)
+                except Exception as e:
+                    print(f"\n[Erreur TTS] {e}", flush=True)
+
+    def audio_worker() -> None:
+        while True:
+            item = audio_q.get()
+            if item is SENTINEL:
+                return
+            try:
+                audio.play_audio(item)
+            except Exception as e:
+                print(f"\n[Erreur Audio] {e}", flush=True)
+
+    tts_thread = threading.Thread(target=tts_worker, daemon=True, name="tts-worker")
+    audio_thread = threading.Thread(target=audio_worker, daemon=True, name="audio-worker")
+    tts_thread.start()
+    audio_thread.start()
+
+    print("Arioch > ", end="", flush=True)
+    buffer = ""
    try:
-        audio_bytes = tts.text_to_speech(reply)
-        audio.play_audio(audio_bytes)
+        for chunk in llm.chat_stream(user_input):
+            print(chunk, end="", flush=True)
+            buffer += chunk
+            sentences, buffer = _split_sentences(buffer)
+            for sentence in sentences:
+                sentence = sentence.strip()
+                if sentence:
+                    sentence_q.put(sentence)
    except Exception as e:
-        print(f"[Erreur TTS/Audio] {e}")
+        print(f"\n[Erreur LLM] {e}", flush=True)
+
+    # Flush any remaining text after the stream ends
+    if buffer.strip():
+        sentence_q.put(buffer.strip())
+
+    print()  # newline after full response
+    sentence_q.put(SENTINEL)
+
+    tts_thread.join()
+    audio_thread.join()


 def _handle_command(user_input: str) -> bool:
@@ -118,7 +221,7 @@ def _handle_mcp(parts: list[str]) -> None:
    print(f"\nTotal : {total} outil(s). Tapez 'mcp tools' pour les lister.\n")


-
+def _list_profiles(profiles: list) -> None:
    if not profiles:
        print("Aucun profil disponible dans profiles/")
        return
@@ -0,0 +1,20 @@
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+MISTRAL_API_KEY: str = os.environ["MISTRAL_API_KEY"]
+
+LLM_MODEL: str = "mistral-large-latest"
+TTS_MODEL: str = "voxtral-mini-tts-2603"
+
+VOICE_ID: str | None = os.getenv("VOICE_ID") or None
+
+SYSTEM_PROMPT: str = os.getenv(
+    "SYSTEM_PROMPT",
+    "Tu es Arioch, un assistant vocal intelligent, concis et sympathique. "
+    "Réponds toujours en français sauf si l'utilisateur parle une autre langue.",
+)
+
+VOICE_LANGUAGE: str = os.getenv("VOICE_LANGUAGE", "fr")
+TTS_PCM_SAMPLE_RATE: int = int(os.getenv("TTS_PCM_SAMPLE_RATE", "24000"))
@@ -1,4 +1,5 @@
 import json
+from typing import Generator

 from mistralai.client import Mistral
 from . import config
@@ -13,8 +14,12 @@ def reset_history() -> None:
    _history.clear()


-def chat(user_message: str) -> str:
-    """Envoie un message au LLM, gère les appels d'outils MCP et retourne la réponse."""
+def chat_stream(user_message: str) -> Generator[str, None, None]:
+    """Génère la réponse du LLM token par token via streaming.
+
+    Les appels d'outils MCP sont exécutés internement (sans streaming).
+    Seule la réponse textuelle finale est streamée sous forme de chunks.
+    """
    from . import mcp_client

    _history.append({"role": "user", "content": user_message})
@@ -24,20 +29,30 @@ def chat(user_message: str) -> str:

    while True:
        messages = [{"role": "system", "content": config.SYSTEM_PROMPT}] + _history
-
        kwargs: dict = {"model": config.LLM_MODEL, "messages": messages}
        if tools:
            kwargs["tools"] = tools

-        response = _client.chat.complete(**kwargs)
-        choice = response.choices[0]
-        msg = choice.message
+        accumulated_content = ""
+        tool_calls_received = None

-        if msg.tool_calls:
-            # 1. Ajouter le message assistant (avec les appels d'outils) à l'historique
+        for event in _client.chat.stream(**kwargs):
+            ch = event.data.choices[0]
+            delta = ch.delta
+
+            # Yield text chunks (isinstance check guards against Unset sentinel)
+            if isinstance(delta.content, str) and delta.content:
+                accumulated_content += delta.content
+                yield delta.content
+
+            if delta.tool_calls:
+                tool_calls_received = delta.tool_calls
+
+        if tool_calls_received:
+            # Append assistant turn with tool calls to history
            _history.append({
                "role": "assistant",
-                "content": msg.content or "",
+                "content": accumulated_content or "",
                "tool_calls": [
                    {
                        "id": tc.id,
@@ -47,12 +62,12 @@ def chat(user_message: str) -> str:
                            "arguments": tc.function.arguments,
                        },
                    }
-                    for tc in msg.tool_calls
+                    for tc in tool_calls_received
                ],
            })

-            # 2. Exécuter chaque outil et ajouter les résultats
-            for tc in msg.tool_calls:
+            # Execute each tool and append results
+            for tc in tool_calls_received:
                tool_name = tc.function.name
                try:
                    args = (
@@ -60,7 +75,7 @@ def chat(user_message: str) -> str:
                        if isinstance(tc.function.arguments, str)
                        else tc.function.arguments
                    )
-                    print(f"  🔧 [MCP] {tool_name}({_short_args(args)})")
+                    print(f"\n  🔧 [MCP] {tool_name}({_short_args(args)})", flush=True)
                    result = manager.call_tool(tool_name, args)
                except Exception as e:
                    result = f"Erreur lors de l'appel à {tool_name} : {e}"
@@ -70,13 +85,17 @@ def chat(user_message: str) -> str:
                    "content": result,
                    "tool_call_id": tc.id,
                })
-
-            # 3. Reboucler pour obtenir la réponse finale
+            # Loop to get the next (final) response

        else:
-            reply = msg.content or ""
-            _history.append({"role": "assistant", "content": reply})
-            return reply
+            # Pure text response — already yielded chunk by chunk; save to history
+            _history.append({"role": "assistant", "content": accumulated_content})
+            break
+
+
+def chat(user_message: str) -> str:
+    """Envoie un message au LLM et retourne la réponse complète (non-streaming)."""
+    return "".join(chat_stream(user_message))


 def _short_args(args: dict) -> str:
@@ -12,12 +12,16 @@ Configure les serveurs dans un profil YAML sous la clé `mcp_servers` :
 from __future__ import annotations

 import asyncio
+import os
 import re
 import threading
-from contextlib import AsyncExitStack
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Any

+# Racine du projet (le dossier qui contient main.py)
+_PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+

@dataclass
 class MCPServerConfig:
@@ -27,6 +31,18 @@ class MCPServerConfig:
    env: dict[str, str] | None = None
    url: str | None = None

+    def resolved_args(self) -> list[str]:
+        """Résout les chemins relatifs dans args par rapport à la racine du projet."""
+        result = []
+        for arg in self.args:
+            p = Path(arg)
+            if not p.is_absolute() and p.suffix in (".js", ".py", ".ts"):
+                resolved = (_PROJECT_ROOT / p).resolve()
+                result.append(str(resolved))
+            else:
+                result.append(arg)
+        return result
+

 def _sanitize_name(name: str) -> str:
    """Transforme un nom en identifiant valide pour l'API Mistral (^[a-zA-Z0-9_-]{1,64}$)."""
@@ -34,7 +50,12 @@ def _sanitize_name(name: str) -> str:


 class MCPManager:
-    """Gère les connexions aux serveurs MCP et l'exécution des outils."""
+    """Gère les connexions aux serveurs MCP et l'exécution des outils.
+
+    Chaque connexion tourne dans une "keeper task" qui possède toute la durée
+    de vie du context manager stdio_client / ClientSession. Cela évite l'erreur
+    anyio "Attempted to exit cancel scope in a different task".
+    """

    def __init__(self) -> None:
        self._loop = asyncio.new_event_loop()
@@ -42,9 +63,9 @@ class MCPManager:
        self._thread.start()
        self._sessions: dict[str, Any] = {}
        self._raw_tools: dict[str, list] = {}
-        # mistral_name -> (server_name, original_tool_name)
        self._tool_map: dict[str, tuple[str, str]] = {}
-        self._exit_stacks: dict[str, AsyncExitStack] = {}
+        # stop event per server, signalled at shutdown
+        self._stop_events: dict[str, asyncio.Event] = {}

    def _run_loop(self) -> None:
        asyncio.set_event_loop(self._loop)
@@ -87,10 +108,18 @@ class MCPManager:
        return self._run(self._call_tool_async(server_name, tool_name, arguments))

    def shutdown(self) -> None:
+        """Signale l'arrêt à toutes les keeper tasks et attend brièvement."""
+        async def _signal_all() -> None:
+            for ev in self._stop_events.values():
+                ev.set()
+            # Laisser une courte fenêtre pour que les tâches se terminent proprement
+            await asyncio.sleep(0.3)
+
        try:
-            self._run(self._shutdown_async(), timeout=10)
+            self._run(_signal_all(), timeout=5)
        except Exception:
            pass
+        self._stop_events.clear()

    def summary(self) -> list[tuple[str, int]]:
        """Retourne [(server_name, tool_count), ...] pour les serveurs connectés."""
@@ -112,38 +141,78 @@ class MCPManager:
                print(f"[MCP] ❌ Connexion {cfg.name} impossible : {e}")

    async def _connect_server(self, cfg: MCPServerConfig) -> None:
-        from mcp import ClientSession, StdioServerParameters
-        from mcp.client.stdio import stdio_client
+        """Lance la keeper task et attend que la connexion soit établie."""
+        stop_event = asyncio.Event()
+        ready_event = asyncio.Event()
+        error_holder: list[Exception] = []

-        stack = AsyncExitStack()
+        self._stop_events[cfg.name] = stop_event

-        if cfg.command:
-            params = StdioServerParameters(
-                command=cfg.command,
-                args=cfg.args or [],
-                env=cfg.env,
-            )
-            read, write = await stack.enter_async_context(stdio_client(params))
-        else:
-            from mcp.client.sse import sse_client
-            read, write = await stack.enter_async_context(sse_client(cfg.url))
+        asyncio.create_task(
+            self._run_server(cfg, ready_event, stop_event, error_holder),
+            name=f"mcp-keeper-{cfg.name}",
+        )

-        session = await stack.enter_async_context(ClientSession(read, write))
-        await session.initialize()
+        # Attendre que la connexion soit prête (ou échoue)
+        try:
+            await asyncio.wait_for(ready_event.wait(), timeout=30)
+        except asyncio.TimeoutError:
+            stop_event.set()
+            raise TimeoutError(f"Timeout lors de la connexion à {cfg.name}")

-        self._sessions[cfg.name] = session
-        self._exit_stacks[cfg.name] = stack
+        if error_holder:
+            raise error_holder[0]

-        tools_resp = await session.list_tools()
-        self._raw_tools[cfg.name] = tools_resp.tools
+    async def _run_server(
+        self,
+        cfg: MCPServerConfig,
+        ready_event: asyncio.Event,
+        stop_event: asyncio.Event,
+        error_holder: list[Exception],
+    ) -> None:
+        """Keeper task : possède le context manager de bout en bout."""
+        try:
+            from mcp import ClientSession, StdioServerParameters

-        server_safe = _sanitize_name(cfg.name)
-        for tool in tools_resp.tools:
-            tool_safe = _sanitize_name(tool.name)
-            mistral_name = f"{server_safe}__{tool_safe}"
-            self._tool_map[mistral_name] = (cfg.name, tool.name)
+            if cfg.command:
+                from mcp.client.stdio import stdio_client
+                transport = stdio_client(StdioServerParameters(
+                    command=cfg.command,
+                    args=cfg.resolved_args(),
+                    env=cfg.env,
+                ))
+            else:
+                from mcp.client.sse import sse_client
+                transport = sse_client(cfg.url)

-        print(f"[MCP] ✅ {cfg.name} — {len(tools_resp.tools)} outil(s) disponible(s)")
+            async with transport as (read, write):
+                async with ClientSession(read, write) as session:
+                    await session.initialize()
+
+                    tools_resp = await session.list_tools()
+                    self._sessions[cfg.name] = session
+                    self._raw_tools[cfg.name] = tools_resp.tools
+
+                    server_safe = _sanitize_name(cfg.name)
+                    for tool in tools_resp.tools:
+                        tool_safe = _sanitize_name(tool.name)
+                        self._tool_map[f"{server_safe}__{tool_safe}"] = (cfg.name, tool.name)
+
+                    print(f"[MCP] ✅ {cfg.name} — {len(tools_resp.tools)} outil(s) disponible(s)")
+                    ready_event.set()
+
+                    # Maintenir la connexion jusqu'au signal d'arrêt
+                    await stop_event.wait()
+
+        except Exception as e:
+            error_holder.append(e)
+            ready_event.set()  # débloquer _connect_server même en cas d'erreur
+        finally:
+            self._sessions.pop(cfg.name, None)
+            self._raw_tools.pop(cfg.name, None)
+            to_remove = [k for k, v in self._tool_map.items() if v[0] == cfg.name]
+            for k in to_remove:
+                del self._tool_map[k]

    async def _call_tool_async(self, server_name: str, tool_name: str, arguments: dict) -> str:
        session = self._sessions[server_name]
@@ -156,17 +225,6 @@ class MCPManager:
                parts.append(str(item))
        return "\n".join(parts) if parts else "(aucun résultat)"

-    async def _shutdown_async(self) -> None:
-        for stack in list(self._exit_stacks.values()):
-            try:
-                await stack.aclose()
-            except Exception:
-                pass
-        self._sessions.clear()
-        self._raw_tools.clear()
-        self._tool_map.clear()
-        self._exit_stacks.clear()
-

 _manager: MCPManager | None = None
 _lock = threading.Lock()
@@ -185,4 +243,6 @@ def reset_manager() -> None:
    with _lock:
        if _manager is not None:
            _manager.shutdown()
-            _manager = MCPManager()
+        _manager = MCPManager()
+
+
@@ -0,0 +1,109 @@
+"""
+Transcription vocale temps réel via Voxtral Mini Transcribe Realtime.
+
+Flux : microphone (PCM 16kHz) → WebSocket Voxtral → texte transcrit
+"""
+import asyncio
+import sys
+from typing import AsyncIterator
+
+import numpy as np
+import sounddevice as sd
+from mistralai.client import Mistral
+from mistralai.client.models import (
+    AudioFormat,
+    TranscriptionStreamDone,
+    TranscriptionStreamTextDelta,
+)
+
+from . import config
+
+STT_MODEL = "voxtral-mini-transcribe-realtime-2602"
+SAMPLE_RATE = 16000
+CHANNELS = 1
+CHUNK_FRAMES = 1600  # 100ms de son par chunk
+
+
+async def _mic_stream(stop_event: asyncio.Event) -> AsyncIterator[bytes]:
+    """Capture le microphone et yield des chunks PCM int16 jusqu'à stop_event."""
+    loop = asyncio.get_event_loop()
+    queue: asyncio.Queue[bytes | None] = asyncio.Queue()
+
+    def callback(indata: np.ndarray, frames: int, time, status) -> None:
+        if status:
+            print(f"[Mic] {status}", file=sys.stderr)
+        # Convertir en int16 little-endian et envoyer
+        pcm = (indata[:, 0] * 32767).astype(np.int16).tobytes()
+        loop.call_soon_threadsafe(queue.put_nowait, pcm)
+
+    stream = sd.InputStream(
+        samplerate=SAMPLE_RATE,
+        channels=CHANNELS,
+        dtype="float32",
+        blocksize=CHUNK_FRAMES,
+        callback=callback,
+    )
+
+    with stream:
+        while not stop_event.is_set():
+            try:
+                chunk = await asyncio.wait_for(queue.get(), timeout=0.2)
+                yield chunk
+            except asyncio.TimeoutError:
+                continue
+
+    # Vider la queue restante
+    while not queue.empty():
+        chunk = queue.get_nowait()
+        if chunk:
+            yield chunk
+
+
+async def transcribe_from_mic() -> str:
+    """
+    Écoute le microphone jusqu'à ce que l'utilisateur appuie sur Entrée,
+    puis retourne le texte transcrit.
+    """
+    client = Mistral(api_key=config.MISTRAL_API_KEY)
+    stop_event = asyncio.Event()
+    loop = asyncio.get_event_loop()
+
+    print("🎤 Parlez... (Entrée pour arrêter)")
+
+    # Attendre Entrée dans un thread pour ne pas bloquer l'event loop
+    async def wait_for_enter() -> None:
+        await loop.run_in_executor(None, input)
+        stop_event.set()
+
+    enter_task = asyncio.create_task(wait_for_enter())
+
+    audio_fmt = AudioFormat(
+        encoding="pcm_s16le",
+        sample_rate=SAMPLE_RATE,
+    )
+
+    final_text = ""
+
+    try:
+        async for event in client.audio.realtime.transcribe_stream(
+            audio_stream=_mic_stream(stop_event),
+            model=STT_MODEL,
+            audio_format=audio_fmt,
+            target_streaming_delay_ms=300,
+        ):
+            if isinstance(event, TranscriptionStreamTextDelta):
+                # Affichage en temps réel du texte partiel
+                print(event.text, end="", flush=True)
+            elif isinstance(event, TranscriptionStreamDone):
+                final_text = event.text
+                print()  # saut de ligne après la transcription
+                break
+    finally:
+        stop_event.set()
+        enter_task.cancel()
+        try:
+            await enter_task
+        except asyncio.CancelledError:
+            pass
+
+    return final_text.strip()
@@ -0,0 +1,54 @@
+import base64
+from mistralai.client import Mistral
+from . import config
+
+_client = Mistral(api_key=config.MISTRAL_API_KEY)
+
+_default_voice_id: str | None = None
+
+
+def _get_default_voice_id() -> str:
+    """
+    Récupère et met en cache l'ID d'une voix preset.
+    Priorise les voix supportant la langue configurée (VOICE_LANGUAGE).
+    """
+    global _default_voice_id
+    if _default_voice_id is not None:
+        return _default_voice_id
+
+    voices = _client.audio.voices.list(type_="preset", limit=50)
+    if not voices.items:
+        raise RuntimeError(
+            "Aucune voix disponible. Configurez VOICE_ID dans .env ou créez une voix "
+            "avec scripts/register_voice.py"
+        )
+
+    # Cherche une voix supportant la langue configurée
+    preferred_lang = config.VOICE_LANGUAGE
+    matching = [
+        v for v in voices.items
+        if v.languages and preferred_lang in v.languages
+    ]
+    chosen = matching[0] if matching else voices.items[0]
+
+    _default_voice_id = chosen.id
+    print(f"[TTS] Voix sélectionnée : {chosen.name} (langues: {chosen.languages}) — id: {_default_voice_id}")
+    return _default_voice_id
+
+
+def text_to_speech(text: str, voice_id: str | None = None) -> bytes:
+    """
+    Convertit du texte en audio WAV via Voxtral TTS.
+
+    WAV est lu nativement par aplay (Linux) et afplay (macOS) sans conversion.
+    """
+    effective_voice_id = voice_id or config.VOICE_ID or _get_default_voice_id()
+
+    response = _client.audio.speech.complete(
+        model=config.TTS_MODEL,
+        input=text,
+        voice_id=effective_voice_id,
+        response_format="wav",
+    )
+
+    return base64.b64decode(response.audio_data)