Add gitignor

This commit is contained in:
2026-04-07 22:06:36 +02:00
parent 49802d89d5
commit 5703cf5871
98 changed files with 5329 additions and 72 deletions

0
assistant/__init__.py Normal file
View File

84
assistant/audio.py Normal file
View File

@@ -0,0 +1,84 @@
import sys
import subprocess
from . import config
def play_audio(pcm_bytes: bytes) -> None:
"""
Joue des bytes PCM bruts (S16LE, mono) via le lecteur système.
- Linux : pipe direct vers aplay (aucun fichier temporaire)
- macOS : pipe vers afplay via stdin (format AIFF/raw)
- Windows: conversion via PowerShell (fallback)
"""
platform = sys.platform
if platform.startswith("linux"):
_play_pcm_aplay(pcm_bytes)
elif platform == "darwin":
_play_pcm_macos(pcm_bytes)
elif platform == "win32":
_play_pcm_windows(pcm_bytes)
else:
raise RuntimeError(f"Plateforme non supportée : {platform}")
def _play_pcm_aplay(pcm_bytes: bytes) -> None:
"""Pipe WAV directement vers aplay (auto-détecte le format depuis le header)."""
proc = subprocess.Popen(
["aplay", "-q", "-"],
stdin=subprocess.PIPE,
)
proc.communicate(pcm_bytes)
if proc.returncode != 0:
raise RuntimeError(f"aplay a échoué (code {proc.returncode})")
def _play_pcm_macos(pcm_bytes: bytes) -> None:
"""Joue du WAV sur macOS via afplay (pipe stdin via fichier temporaire)."""
import tempfile, os
# afplay ne lit pas depuis stdin, on utilise un fichier temporaire
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
tmp.write(pcm_bytes)
tmp_path = tmp.name
try:
subprocess.run(["afplay", tmp_path], check=True)
finally:
os.unlink(tmp_path)
def _play_pcm_windows(pcm_bytes: bytes) -> None:
import tempfile, os
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
tmp.write(_pcm_to_wav(pcm_bytes))
tmp_path = tmp.name
try:
subprocess.run(
["powershell", "-c", f'(New-Object Media.SoundPlayer "{tmp_path}").PlaySync()'],
check=True,
)
finally:
os.unlink(tmp_path)
def _pcm_to_wav(pcm_bytes: bytes) -> bytes:
"""Ajoute un header WAV minimal au PCM brut S16LE mono."""
import struct
sample_rate = config.TTS_PCM_SAMPLE_RATE
num_channels = 1
bits_per_sample = 16
byte_rate = sample_rate * num_channels * bits_per_sample // 8
block_align = num_channels * bits_per_sample // 8
data_size = len(pcm_bytes)
header = struct.pack(
"<4sI4s4sIHHIIHH4sI",
b"RIFF", 36 + data_size, b"WAVE",
b"fmt ", 16, 1, num_channels,
sample_rate, byte_rate, block_align, bits_per_sample,
b"data", data_size,
)
return header + pcm_bytes
def _command_exists(cmd: str) -> bool:
return subprocess.run(["which", cmd], capture_output=True).returncode == 0

View File

@@ -1,5 +1,8 @@
import asyncio
import queue as _queue
import re
import sys
import threading
from . import llm, tts, audio, config
@@ -34,22 +37,122 @@ def _set_voice(parts: list[str]) -> None:
print(f"Voix définie sur : {config.VOICE_ID}")
# ---------------------------------------------------------------------------
# Sentence splitting & markdown cleaning for TTS pipeline
# ---------------------------------------------------------------------------
# Split on sentence boundaries using fixed-width lookbehinds (Python constraint).
# For ".": 3-char lookbehind — require the 2 chars before the period to be
# lowercase, digit, hyphen, or closing bracket (NOT uppercase). This protects
# "M.", "No.", "A." while correctly splitting "arrivé.", "Jump-2.", "(J2).", "ok."
_SENT_RE = re.compile(
r'(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.])\s+' # word(2+) + period
r'|(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.][»"\')\]])\s+' # + closing quote
r'|(?<=[!?…])\s+' # ! ? …
r'|(?<=[!?…][»"\')\]])\s+' # ! ? … + closing quote
r'|\n{2,}' # paragraph break
)
# Markdown patterns to strip before TTS (keep inner text where applicable)
_MD_CODE_BLOCK = re.compile(r'```.*?```', re.DOTALL)
_MD_INLINE = re.compile(r'\*{1,3}(.*?)\*{1,3}|_{1,2}(.*?)_{1,2}|~~(.*?)~~|`([^`]+)`', re.DOTALL)
_MD_LINK = re.compile(r'\[([^\]]*)\]\([^\)]*\)')
_MD_HEADER = re.compile(r'^#{1,6}\s+', re.MULTILINE)
_MULTI_SPACE = re.compile(r'\s+')
def _split_sentences(text: str) -> tuple[list[str], str]:
"""Extrait les phrases complètes d'un buffer partiel.
Returns (complete_sentences, remainder).
"""
parts = _SENT_RE.split(text)
if len(parts) <= 1:
return [], text
return parts[:-1], parts[-1]
def _clean_for_tts(text: str) -> str:
"""Supprime le formatage Markdown avant la synthèse vocale."""
text = _MD_CODE_BLOCK.sub(' ', text)
text = _MD_INLINE.sub(lambda m: next(g for g in m.groups() if g is not None), text)
text = _MD_LINK.sub(r'\1', text)
text = _MD_HEADER.sub('', text)
text = text.replace('', ' vers ').replace('', ' depuis ')
text = _MULTI_SPACE.sub(' ', text).strip()
return text
# ---------------------------------------------------------------------------
# Message processing — streaming TTS pipeline
# ---------------------------------------------------------------------------
def _process_message(user_input: str) -> None:
"""Envoie un message au LLM et lit la réponse à voix haute."""
print(f"Arioch > ", end="", flush=True)
try:
reply = llm.chat(user_input)
except Exception as e:
print(f"\n[Erreur LLM] {e}")
return
"""Envoie un message au LLM et lit la réponse à voix haute.
print(reply)
Pipeline 3 étages en parallèle :
[LLM stream] → sentence_queue → [TTS thread] → audio_queue → [Audio thread]
Dès qu'une phrase complète est détectée dans le stream LLM, elle part
immédiatement en synthèse Voxtral. L'audio est joué dès qu'il est prêt,
pendant que la phrase suivante est déjà en cours de synthèse.
"""
SENTINEL = object()
sentence_q: _queue.Queue = _queue.Queue()
audio_q: _queue.Queue = _queue.Queue()
def tts_worker() -> None:
while True:
item = sentence_q.get()
if item is SENTINEL:
audio_q.put(SENTINEL)
return
text = _clean_for_tts(item)
if text:
try:
audio_bytes = tts.text_to_speech(text)
audio_q.put(audio_bytes)
except Exception as e:
print(f"\n[Erreur TTS] {e}", flush=True)
def audio_worker() -> None:
while True:
item = audio_q.get()
if item is SENTINEL:
return
try:
audio.play_audio(item)
except Exception as e:
print(f"\n[Erreur Audio] {e}", flush=True)
tts_thread = threading.Thread(target=tts_worker, daemon=True, name="tts-worker")
audio_thread = threading.Thread(target=audio_worker, daemon=True, name="audio-worker")
tts_thread.start()
audio_thread.start()
print("Arioch > ", end="", flush=True)
buffer = ""
try:
audio_bytes = tts.text_to_speech(reply)
audio.play_audio(audio_bytes)
for chunk in llm.chat_stream(user_input):
print(chunk, end="", flush=True)
buffer += chunk
sentences, buffer = _split_sentences(buffer)
for sentence in sentences:
sentence = sentence.strip()
if sentence:
sentence_q.put(sentence)
except Exception as e:
print(f"[Erreur TTS/Audio] {e}")
print(f"\n[Erreur LLM] {e}", flush=True)
# Flush any remaining text after the stream ends
if buffer.strip():
sentence_q.put(buffer.strip())
print() # newline after full response
sentence_q.put(SENTINEL)
tts_thread.join()
audio_thread.join()
def _handle_command(user_input: str) -> bool:
@@ -118,7 +221,7 @@ def _handle_mcp(parts: list[str]) -> None:
print(f"\nTotal : {total} outil(s). Tapez 'mcp tools' pour les lister.\n")
def _list_profiles(profiles: list) -> None:
if not profiles:
print("Aucun profil disponible dans profiles/")
return

20
assistant/config.py Normal file
View File

@@ -0,0 +1,20 @@
import os
from dotenv import load_dotenv
load_dotenv()
MISTRAL_API_KEY: str = os.environ["MISTRAL_API_KEY"]
LLM_MODEL: str = "mistral-large-latest"
TTS_MODEL: str = "voxtral-mini-tts-2603"
VOICE_ID: str | None = os.getenv("VOICE_ID") or None
SYSTEM_PROMPT: str = os.getenv(
"SYSTEM_PROMPT",
"Tu es Arioch, un assistant vocal intelligent, concis et sympathique. "
"Réponds toujours en français sauf si l'utilisateur parle une autre langue.",
)
VOICE_LANGUAGE: str = os.getenv("VOICE_LANGUAGE", "fr")
TTS_PCM_SAMPLE_RATE: int = int(os.getenv("TTS_PCM_SAMPLE_RATE", "24000"))

View File

@@ -1,4 +1,5 @@
import json
from typing import Generator
from mistralai.client import Mistral
from . import config
@@ -13,8 +14,12 @@ def reset_history() -> None:
_history.clear()
def chat(user_message: str) -> str:
"""Envoie un message au LLM, gère les appels d'outils MCP et retourne la réponse."""
def chat_stream(user_message: str) -> Generator[str, None, None]:
"""Génère la réponse du LLM token par token via streaming.
Les appels d'outils MCP sont exécutés internement (sans streaming).
Seule la réponse textuelle finale est streamée sous forme de chunks.
"""
from . import mcp_client
_history.append({"role": "user", "content": user_message})
@@ -24,20 +29,30 @@ def chat(user_message: str) -> str:
while True:
messages = [{"role": "system", "content": config.SYSTEM_PROMPT}] + _history
kwargs: dict = {"model": config.LLM_MODEL, "messages": messages}
if tools:
kwargs["tools"] = tools
response = _client.chat.complete(**kwargs)
choice = response.choices[0]
msg = choice.message
accumulated_content = ""
tool_calls_received = None
if msg.tool_calls:
# 1. Ajouter le message assistant (avec les appels d'outils) à l'historique
for event in _client.chat.stream(**kwargs):
ch = event.data.choices[0]
delta = ch.delta
# Yield text chunks (isinstance check guards against Unset sentinel)
if isinstance(delta.content, str) and delta.content:
accumulated_content += delta.content
yield delta.content
if delta.tool_calls:
tool_calls_received = delta.tool_calls
if tool_calls_received:
# Append assistant turn with tool calls to history
_history.append({
"role": "assistant",
"content": msg.content or "",
"content": accumulated_content or "",
"tool_calls": [
{
"id": tc.id,
@@ -47,12 +62,12 @@ def chat(user_message: str) -> str:
"arguments": tc.function.arguments,
},
}
for tc in msg.tool_calls
for tc in tool_calls_received
],
})
# 2. Exécuter chaque outil et ajouter les résultats
for tc in msg.tool_calls:
# Execute each tool and append results
for tc in tool_calls_received:
tool_name = tc.function.name
try:
args = (
@@ -60,7 +75,7 @@ def chat(user_message: str) -> str:
if isinstance(tc.function.arguments, str)
else tc.function.arguments
)
print(f" 🔧 [MCP] {tool_name}({_short_args(args)})")
print(f"\n 🔧 [MCP] {tool_name}({_short_args(args)})", flush=True)
result = manager.call_tool(tool_name, args)
except Exception as e:
result = f"Erreur lors de l'appel à {tool_name} : {e}"
@@ -70,13 +85,17 @@ def chat(user_message: str) -> str:
"content": result,
"tool_call_id": tc.id,
})
# 3. Reboucler pour obtenir la réponse finale
# Loop to get the next (final) response
else:
reply = msg.content or ""
_history.append({"role": "assistant", "content": reply})
return reply
# Pure text response — already yielded chunk by chunk; save to history
_history.append({"role": "assistant", "content": accumulated_content})
break
def chat(user_message: str) -> str:
"""Envoie un message au LLM et retourne la réponse complète (non-streaming)."""
return "".join(chat_stream(user_message))
def _short_args(args: dict) -> str:

View File

@@ -12,12 +12,16 @@ Configure les serveurs dans un profil YAML sous la clé `mcp_servers` :
from __future__ import annotations
import asyncio
import os
import re
import threading
from contextlib import AsyncExitStack
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
# Racine du projet (le dossier qui contient main.py)
_PROJECT_ROOT = Path(__file__).parent.parent.resolve()
@dataclass
class MCPServerConfig:
@@ -27,6 +31,18 @@ class MCPServerConfig:
env: dict[str, str] | None = None
url: str | None = None
def resolved_args(self) -> list[str]:
"""Résout les chemins relatifs dans args par rapport à la racine du projet."""
result = []
for arg in self.args:
p = Path(arg)
if not p.is_absolute() and p.suffix in (".js", ".py", ".ts"):
resolved = (_PROJECT_ROOT / p).resolve()
result.append(str(resolved))
else:
result.append(arg)
return result
def _sanitize_name(name: str) -> str:
"""Transforme un nom en identifiant valide pour l'API Mistral (^[a-zA-Z0-9_-]{1,64}$)."""
@@ -34,7 +50,12 @@ def _sanitize_name(name: str) -> str:
class MCPManager:
"""Gère les connexions aux serveurs MCP et l'exécution des outils."""
"""Gère les connexions aux serveurs MCP et l'exécution des outils.
Chaque connexion tourne dans une "keeper task" qui possède toute la durée
de vie du context manager stdio_client / ClientSession. Cela évite l'erreur
anyio "Attempted to exit cancel scope in a different task".
"""
def __init__(self) -> None:
self._loop = asyncio.new_event_loop()
@@ -42,9 +63,9 @@ class MCPManager:
self._thread.start()
self._sessions: dict[str, Any] = {}
self._raw_tools: dict[str, list] = {}
# mistral_name -> (server_name, original_tool_name)
self._tool_map: dict[str, tuple[str, str]] = {}
self._exit_stacks: dict[str, AsyncExitStack] = {}
# stop event per server, signalled at shutdown
self._stop_events: dict[str, asyncio.Event] = {}
def _run_loop(self) -> None:
asyncio.set_event_loop(self._loop)
@@ -87,10 +108,18 @@ class MCPManager:
return self._run(self._call_tool_async(server_name, tool_name, arguments))
def shutdown(self) -> None:
"""Signale l'arrêt à toutes les keeper tasks et attend brièvement."""
async def _signal_all() -> None:
for ev in self._stop_events.values():
ev.set()
# Laisser une courte fenêtre pour que les tâches se terminent proprement
await asyncio.sleep(0.3)
try:
self._run(self._shutdown_async(), timeout=10)
self._run(_signal_all(), timeout=5)
except Exception:
pass
self._stop_events.clear()
def summary(self) -> list[tuple[str, int]]:
"""Retourne [(server_name, tool_count), ...] pour les serveurs connectés."""
@@ -112,38 +141,78 @@ class MCPManager:
print(f"[MCP] ❌ Connexion {cfg.name} impossible : {e}")
async def _connect_server(self, cfg: MCPServerConfig) -> None:
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
"""Lance la keeper task et attend que la connexion soit établie."""
stop_event = asyncio.Event()
ready_event = asyncio.Event()
error_holder: list[Exception] = []
stack = AsyncExitStack()
self._stop_events[cfg.name] = stop_event
if cfg.command:
params = StdioServerParameters(
command=cfg.command,
args=cfg.args or [],
env=cfg.env,
)
read, write = await stack.enter_async_context(stdio_client(params))
else:
from mcp.client.sse import sse_client
read, write = await stack.enter_async_context(sse_client(cfg.url))
asyncio.create_task(
self._run_server(cfg, ready_event, stop_event, error_holder),
name=f"mcp-keeper-{cfg.name}",
)
session = await stack.enter_async_context(ClientSession(read, write))
await session.initialize()
# Attendre que la connexion soit prête (ou échoue)
try:
await asyncio.wait_for(ready_event.wait(), timeout=30)
except asyncio.TimeoutError:
stop_event.set()
raise TimeoutError(f"Timeout lors de la connexion à {cfg.name}")
self._sessions[cfg.name] = session
self._exit_stacks[cfg.name] = stack
if error_holder:
raise error_holder[0]
tools_resp = await session.list_tools()
self._raw_tools[cfg.name] = tools_resp.tools
async def _run_server(
self,
cfg: MCPServerConfig,
ready_event: asyncio.Event,
stop_event: asyncio.Event,
error_holder: list[Exception],
) -> None:
"""Keeper task : possède le context manager de bout en bout."""
try:
from mcp import ClientSession, StdioServerParameters
server_safe = _sanitize_name(cfg.name)
for tool in tools_resp.tools:
tool_safe = _sanitize_name(tool.name)
mistral_name = f"{server_safe}__{tool_safe}"
self._tool_map[mistral_name] = (cfg.name, tool.name)
if cfg.command:
from mcp.client.stdio import stdio_client
transport = stdio_client(StdioServerParameters(
command=cfg.command,
args=cfg.resolved_args(),
env=cfg.env,
))
else:
from mcp.client.sse import sse_client
transport = sse_client(cfg.url)
print(f"[MCP] ✅ {cfg.name}{len(tools_resp.tools)} outil(s) disponible(s)")
async with transport as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
tools_resp = await session.list_tools()
self._sessions[cfg.name] = session
self._raw_tools[cfg.name] = tools_resp.tools
server_safe = _sanitize_name(cfg.name)
for tool in tools_resp.tools:
tool_safe = _sanitize_name(tool.name)
self._tool_map[f"{server_safe}__{tool_safe}"] = (cfg.name, tool.name)
print(f"[MCP] ✅ {cfg.name}{len(tools_resp.tools)} outil(s) disponible(s)")
ready_event.set()
# Maintenir la connexion jusqu'au signal d'arrêt
await stop_event.wait()
except Exception as e:
error_holder.append(e)
ready_event.set() # débloquer _connect_server même en cas d'erreur
finally:
self._sessions.pop(cfg.name, None)
self._raw_tools.pop(cfg.name, None)
to_remove = [k for k, v in self._tool_map.items() if v[0] == cfg.name]
for k in to_remove:
del self._tool_map[k]
async def _call_tool_async(self, server_name: str, tool_name: str, arguments: dict) -> str:
session = self._sessions[server_name]
@@ -156,17 +225,6 @@ class MCPManager:
parts.append(str(item))
return "\n".join(parts) if parts else "(aucun résultat)"
async def _shutdown_async(self) -> None:
for stack in list(self._exit_stacks.values()):
try:
await stack.aclose()
except Exception:
pass
self._sessions.clear()
self._raw_tools.clear()
self._tool_map.clear()
self._exit_stacks.clear()
_manager: MCPManager | None = None
_lock = threading.Lock()
@@ -185,4 +243,6 @@ def reset_manager() -> None:
with _lock:
if _manager is not None:
_manager.shutdown()
_manager = MCPManager()
_manager = MCPManager()

109
assistant/stt.py Normal file
View File

@@ -0,0 +1,109 @@
"""
Transcription vocale temps réel via Voxtral Mini Transcribe Realtime.
Flux : microphone (PCM 16kHz) → WebSocket Voxtral → texte transcrit
"""
import asyncio
import sys
from typing import AsyncIterator
import numpy as np
import sounddevice as sd
from mistralai.client import Mistral
from mistralai.client.models import (
AudioFormat,
TranscriptionStreamDone,
TranscriptionStreamTextDelta,
)
from . import config
STT_MODEL = "voxtral-mini-transcribe-realtime-2602"
SAMPLE_RATE = 16000
CHANNELS = 1
CHUNK_FRAMES = 1600 # 100ms de son par chunk
async def _mic_stream(stop_event: asyncio.Event) -> AsyncIterator[bytes]:
"""Capture le microphone et yield des chunks PCM int16 jusqu'à stop_event."""
loop = asyncio.get_event_loop()
queue: asyncio.Queue[bytes | None] = asyncio.Queue()
def callback(indata: np.ndarray, frames: int, time, status) -> None:
if status:
print(f"[Mic] {status}", file=sys.stderr)
# Convertir en int16 little-endian et envoyer
pcm = (indata[:, 0] * 32767).astype(np.int16).tobytes()
loop.call_soon_threadsafe(queue.put_nowait, pcm)
stream = sd.InputStream(
samplerate=SAMPLE_RATE,
channels=CHANNELS,
dtype="float32",
blocksize=CHUNK_FRAMES,
callback=callback,
)
with stream:
while not stop_event.is_set():
try:
chunk = await asyncio.wait_for(queue.get(), timeout=0.2)
yield chunk
except asyncio.TimeoutError:
continue
# Vider la queue restante
while not queue.empty():
chunk = queue.get_nowait()
if chunk:
yield chunk
async def transcribe_from_mic() -> str:
"""
Écoute le microphone jusqu'à ce que l'utilisateur appuie sur Entrée,
puis retourne le texte transcrit.
"""
client = Mistral(api_key=config.MISTRAL_API_KEY)
stop_event = asyncio.Event()
loop = asyncio.get_event_loop()
print("🎤 Parlez... (Entrée pour arrêter)")
# Attendre Entrée dans un thread pour ne pas bloquer l'event loop
async def wait_for_enter() -> None:
await loop.run_in_executor(None, input)
stop_event.set()
enter_task = asyncio.create_task(wait_for_enter())
audio_fmt = AudioFormat(
encoding="pcm_s16le",
sample_rate=SAMPLE_RATE,
)
final_text = ""
try:
async for event in client.audio.realtime.transcribe_stream(
audio_stream=_mic_stream(stop_event),
model=STT_MODEL,
audio_format=audio_fmt,
target_streaming_delay_ms=300,
):
if isinstance(event, TranscriptionStreamTextDelta):
# Affichage en temps réel du texte partiel
print(event.text, end="", flush=True)
elif isinstance(event, TranscriptionStreamDone):
final_text = event.text
print() # saut de ligne après la transcription
break
finally:
stop_event.set()
enter_task.cancel()
try:
await enter_task
except asyncio.CancelledError:
pass
return final_text.strip()

54
assistant/tts.py Normal file
View File

@@ -0,0 +1,54 @@
import base64
from mistralai.client import Mistral
from . import config
_client = Mistral(api_key=config.MISTRAL_API_KEY)
_default_voice_id: str | None = None
def _get_default_voice_id() -> str:
"""
Récupère et met en cache l'ID d'une voix preset.
Priorise les voix supportant la langue configurée (VOICE_LANGUAGE).
"""
global _default_voice_id
if _default_voice_id is not None:
return _default_voice_id
voices = _client.audio.voices.list(type_="preset", limit=50)
if not voices.items:
raise RuntimeError(
"Aucune voix disponible. Configurez VOICE_ID dans .env ou créez une voix "
"avec scripts/register_voice.py"
)
# Cherche une voix supportant la langue configurée
preferred_lang = config.VOICE_LANGUAGE
matching = [
v for v in voices.items
if v.languages and preferred_lang in v.languages
]
chosen = matching[0] if matching else voices.items[0]
_default_voice_id = chosen.id
print(f"[TTS] Voix sélectionnée : {chosen.name} (langues: {chosen.languages}) — id: {_default_voice_id}")
return _default_voice_id
def text_to_speech(text: str, voice_id: str | None = None) -> bytes:
"""
Convertit du texte en audio WAV via Voxtral TTS.
WAV est lu nativement par aplay (Linux) et afplay (macOS) sans conversion.
"""
effective_voice_id = voice_id or config.VOICE_ID or _get_default_voice_id()
response = _client.audio.speech.complete(
model=config.TTS_MODEL,
input=text,
voice_id=effective_voice_id,
response_format="wav",
)
return base64.b64decode(response.audio_data)