Add gitignor
This commit is contained in:
0
assistant/__init__.py
Normal file
0
assistant/__init__.py
Normal file
84
assistant/audio.py
Normal file
84
assistant/audio.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import sys
|
||||
import subprocess
|
||||
from . import config
|
||||
|
||||
|
||||
def play_audio(pcm_bytes: bytes) -> None:
|
||||
"""
|
||||
Joue des bytes PCM bruts (S16LE, mono) via le lecteur système.
|
||||
|
||||
- Linux : pipe direct vers aplay (aucun fichier temporaire)
|
||||
- macOS : pipe vers afplay via stdin (format AIFF/raw)
|
||||
- Windows: conversion via PowerShell (fallback)
|
||||
"""
|
||||
platform = sys.platform
|
||||
|
||||
if platform.startswith("linux"):
|
||||
_play_pcm_aplay(pcm_bytes)
|
||||
elif platform == "darwin":
|
||||
_play_pcm_macos(pcm_bytes)
|
||||
elif platform == "win32":
|
||||
_play_pcm_windows(pcm_bytes)
|
||||
else:
|
||||
raise RuntimeError(f"Plateforme non supportée : {platform}")
|
||||
|
||||
|
||||
def _play_pcm_aplay(pcm_bytes: bytes) -> None:
|
||||
"""Pipe WAV directement vers aplay (auto-détecte le format depuis le header)."""
|
||||
proc = subprocess.Popen(
|
||||
["aplay", "-q", "-"],
|
||||
stdin=subprocess.PIPE,
|
||||
)
|
||||
proc.communicate(pcm_bytes)
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"aplay a échoué (code {proc.returncode})")
|
||||
|
||||
|
||||
def _play_pcm_macos(pcm_bytes: bytes) -> None:
|
||||
"""Joue du WAV sur macOS via afplay (pipe stdin via fichier temporaire)."""
|
||||
import tempfile, os
|
||||
# afplay ne lit pas depuis stdin, on utilise un fichier temporaire
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
||||
tmp.write(pcm_bytes)
|
||||
tmp_path = tmp.name
|
||||
try:
|
||||
subprocess.run(["afplay", tmp_path], check=True)
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
def _play_pcm_windows(pcm_bytes: bytes) -> None:
|
||||
import tempfile, os
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
||||
tmp.write(_pcm_to_wav(pcm_bytes))
|
||||
tmp_path = tmp.name
|
||||
try:
|
||||
subprocess.run(
|
||||
["powershell", "-c", f'(New-Object Media.SoundPlayer "{tmp_path}").PlaySync()'],
|
||||
check=True,
|
||||
)
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
def _pcm_to_wav(pcm_bytes: bytes) -> bytes:
|
||||
"""Ajoute un header WAV minimal au PCM brut S16LE mono."""
|
||||
import struct
|
||||
sample_rate = config.TTS_PCM_SAMPLE_RATE
|
||||
num_channels = 1
|
||||
bits_per_sample = 16
|
||||
byte_rate = sample_rate * num_channels * bits_per_sample // 8
|
||||
block_align = num_channels * bits_per_sample // 8
|
||||
data_size = len(pcm_bytes)
|
||||
header = struct.pack(
|
||||
"<4sI4s4sIHHIIHH4sI",
|
||||
b"RIFF", 36 + data_size, b"WAVE",
|
||||
b"fmt ", 16, 1, num_channels,
|
||||
sample_rate, byte_rate, block_align, bits_per_sample,
|
||||
b"data", data_size,
|
||||
)
|
||||
return header + pcm_bytes
|
||||
|
||||
|
||||
def _command_exists(cmd: str) -> bool:
|
||||
return subprocess.run(["which", cmd], capture_output=True).returncode == 0
|
||||
127
assistant/cli.py
127
assistant/cli.py
@@ -1,5 +1,8 @@
|
||||
import asyncio
|
||||
import queue as _queue
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
from . import llm, tts, audio, config
|
||||
|
||||
|
||||
@@ -34,22 +37,122 @@ def _set_voice(parts: list[str]) -> None:
|
||||
print(f"Voix définie sur : {config.VOICE_ID}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sentence splitting & markdown cleaning for TTS pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Split on sentence boundaries using fixed-width lookbehinds (Python constraint).
|
||||
# For ".": 3-char lookbehind — require the 2 chars before the period to be
|
||||
# lowercase, digit, hyphen, or closing bracket (NOT uppercase). This protects
|
||||
# "M.", "No.", "A." while correctly splitting "arrivé.", "Jump-2.", "(J2).", "ok."
|
||||
_SENT_RE = re.compile(
|
||||
r'(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.])\s+' # word(2+) + period
|
||||
r'|(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.][»"\')\]])\s+' # + closing quote
|
||||
r'|(?<=[!?…])\s+' # ! ? …
|
||||
r'|(?<=[!?…][»"\')\]])\s+' # ! ? … + closing quote
|
||||
r'|\n{2,}' # paragraph break
|
||||
)
|
||||
|
||||
# Markdown patterns to strip before TTS (keep inner text where applicable)
|
||||
_MD_CODE_BLOCK = re.compile(r'```.*?```', re.DOTALL)
|
||||
_MD_INLINE = re.compile(r'\*{1,3}(.*?)\*{1,3}|_{1,2}(.*?)_{1,2}|~~(.*?)~~|`([^`]+)`', re.DOTALL)
|
||||
_MD_LINK = re.compile(r'\[([^\]]*)\]\([^\)]*\)')
|
||||
_MD_HEADER = re.compile(r'^#{1,6}\s+', re.MULTILINE)
|
||||
_MULTI_SPACE = re.compile(r'\s+')
|
||||
|
||||
|
||||
def _split_sentences(text: str) -> tuple[list[str], str]:
|
||||
"""Extrait les phrases complètes d'un buffer partiel.
|
||||
|
||||
Returns (complete_sentences, remainder).
|
||||
"""
|
||||
parts = _SENT_RE.split(text)
|
||||
if len(parts) <= 1:
|
||||
return [], text
|
||||
return parts[:-1], parts[-1]
|
||||
|
||||
|
||||
def _clean_for_tts(text: str) -> str:
|
||||
"""Supprime le formatage Markdown avant la synthèse vocale."""
|
||||
text = _MD_CODE_BLOCK.sub(' ', text)
|
||||
text = _MD_INLINE.sub(lambda m: next(g for g in m.groups() if g is not None), text)
|
||||
text = _MD_LINK.sub(r'\1', text)
|
||||
text = _MD_HEADER.sub('', text)
|
||||
text = text.replace('→', ' vers ').replace('←', ' depuis ')
|
||||
text = _MULTI_SPACE.sub(' ', text).strip()
|
||||
return text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Message processing — streaming TTS pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _process_message(user_input: str) -> None:
|
||||
"""Envoie un message au LLM et lit la réponse à voix haute."""
|
||||
print(f"Arioch > ", end="", flush=True)
|
||||
try:
|
||||
reply = llm.chat(user_input)
|
||||
except Exception as e:
|
||||
print(f"\n[Erreur LLM] {e}")
|
||||
return
|
||||
"""Envoie un message au LLM et lit la réponse à voix haute.
|
||||
|
||||
print(reply)
|
||||
Pipeline 3 étages en parallèle :
|
||||
[LLM stream] → sentence_queue → [TTS thread] → audio_queue → [Audio thread]
|
||||
|
||||
Dès qu'une phrase complète est détectée dans le stream LLM, elle part
|
||||
immédiatement en synthèse Voxtral. L'audio est joué dès qu'il est prêt,
|
||||
pendant que la phrase suivante est déjà en cours de synthèse.
|
||||
"""
|
||||
SENTINEL = object()
|
||||
sentence_q: _queue.Queue = _queue.Queue()
|
||||
audio_q: _queue.Queue = _queue.Queue()
|
||||
|
||||
def tts_worker() -> None:
|
||||
while True:
|
||||
item = sentence_q.get()
|
||||
if item is SENTINEL:
|
||||
audio_q.put(SENTINEL)
|
||||
return
|
||||
text = _clean_for_tts(item)
|
||||
if text:
|
||||
try:
|
||||
audio_bytes = tts.text_to_speech(text)
|
||||
audio_q.put(audio_bytes)
|
||||
except Exception as e:
|
||||
print(f"\n[Erreur TTS] {e}", flush=True)
|
||||
|
||||
def audio_worker() -> None:
|
||||
while True:
|
||||
item = audio_q.get()
|
||||
if item is SENTINEL:
|
||||
return
|
||||
try:
|
||||
audio.play_audio(item)
|
||||
except Exception as e:
|
||||
print(f"\n[Erreur Audio] {e}", flush=True)
|
||||
|
||||
tts_thread = threading.Thread(target=tts_worker, daemon=True, name="tts-worker")
|
||||
audio_thread = threading.Thread(target=audio_worker, daemon=True, name="audio-worker")
|
||||
tts_thread.start()
|
||||
audio_thread.start()
|
||||
|
||||
print("Arioch > ", end="", flush=True)
|
||||
buffer = ""
|
||||
try:
|
||||
audio_bytes = tts.text_to_speech(reply)
|
||||
audio.play_audio(audio_bytes)
|
||||
for chunk in llm.chat_stream(user_input):
|
||||
print(chunk, end="", flush=True)
|
||||
buffer += chunk
|
||||
sentences, buffer = _split_sentences(buffer)
|
||||
for sentence in sentences:
|
||||
sentence = sentence.strip()
|
||||
if sentence:
|
||||
sentence_q.put(sentence)
|
||||
except Exception as e:
|
||||
print(f"[Erreur TTS/Audio] {e}")
|
||||
print(f"\n[Erreur LLM] {e}", flush=True)
|
||||
|
||||
# Flush any remaining text after the stream ends
|
||||
if buffer.strip():
|
||||
sentence_q.put(buffer.strip())
|
||||
|
||||
print() # newline after full response
|
||||
sentence_q.put(SENTINEL)
|
||||
|
||||
tts_thread.join()
|
||||
audio_thread.join()
|
||||
|
||||
|
||||
def _handle_command(user_input: str) -> bool:
|
||||
@@ -118,7 +221,7 @@ def _handle_mcp(parts: list[str]) -> None:
|
||||
print(f"\nTotal : {total} outil(s). Tapez 'mcp tools' pour les lister.\n")
|
||||
|
||||
|
||||
|
||||
def _list_profiles(profiles: list) -> None:
|
||||
if not profiles:
|
||||
print("Aucun profil disponible dans profiles/")
|
||||
return
|
||||
|
||||
20
assistant/config.py
Normal file
20
assistant/config.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
MISTRAL_API_KEY: str = os.environ["MISTRAL_API_KEY"]
|
||||
|
||||
LLM_MODEL: str = "mistral-large-latest"
|
||||
TTS_MODEL: str = "voxtral-mini-tts-2603"
|
||||
|
||||
VOICE_ID: str | None = os.getenv("VOICE_ID") or None
|
||||
|
||||
SYSTEM_PROMPT: str = os.getenv(
|
||||
"SYSTEM_PROMPT",
|
||||
"Tu es Arioch, un assistant vocal intelligent, concis et sympathique. "
|
||||
"Réponds toujours en français sauf si l'utilisateur parle une autre langue.",
|
||||
)
|
||||
|
||||
VOICE_LANGUAGE: str = os.getenv("VOICE_LANGUAGE", "fr")
|
||||
TTS_PCM_SAMPLE_RATE: int = int(os.getenv("TTS_PCM_SAMPLE_RATE", "24000"))
|
||||
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
from typing import Generator
|
||||
|
||||
from mistralai.client import Mistral
|
||||
from . import config
|
||||
@@ -13,8 +14,12 @@ def reset_history() -> None:
|
||||
_history.clear()
|
||||
|
||||
|
||||
def chat(user_message: str) -> str:
|
||||
"""Envoie un message au LLM, gère les appels d'outils MCP et retourne la réponse."""
|
||||
def chat_stream(user_message: str) -> Generator[str, None, None]:
|
||||
"""Génère la réponse du LLM token par token via streaming.
|
||||
|
||||
Les appels d'outils MCP sont exécutés internement (sans streaming).
|
||||
Seule la réponse textuelle finale est streamée sous forme de chunks.
|
||||
"""
|
||||
from . import mcp_client
|
||||
|
||||
_history.append({"role": "user", "content": user_message})
|
||||
@@ -24,20 +29,30 @@ def chat(user_message: str) -> str:
|
||||
|
||||
while True:
|
||||
messages = [{"role": "system", "content": config.SYSTEM_PROMPT}] + _history
|
||||
|
||||
kwargs: dict = {"model": config.LLM_MODEL, "messages": messages}
|
||||
if tools:
|
||||
kwargs["tools"] = tools
|
||||
|
||||
response = _client.chat.complete(**kwargs)
|
||||
choice = response.choices[0]
|
||||
msg = choice.message
|
||||
accumulated_content = ""
|
||||
tool_calls_received = None
|
||||
|
||||
if msg.tool_calls:
|
||||
# 1. Ajouter le message assistant (avec les appels d'outils) à l'historique
|
||||
for event in _client.chat.stream(**kwargs):
|
||||
ch = event.data.choices[0]
|
||||
delta = ch.delta
|
||||
|
||||
# Yield text chunks (isinstance check guards against Unset sentinel)
|
||||
if isinstance(delta.content, str) and delta.content:
|
||||
accumulated_content += delta.content
|
||||
yield delta.content
|
||||
|
||||
if delta.tool_calls:
|
||||
tool_calls_received = delta.tool_calls
|
||||
|
||||
if tool_calls_received:
|
||||
# Append assistant turn with tool calls to history
|
||||
_history.append({
|
||||
"role": "assistant",
|
||||
"content": msg.content or "",
|
||||
"content": accumulated_content or "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": tc.id,
|
||||
@@ -47,12 +62,12 @@ def chat(user_message: str) -> str:
|
||||
"arguments": tc.function.arguments,
|
||||
},
|
||||
}
|
||||
for tc in msg.tool_calls
|
||||
for tc in tool_calls_received
|
||||
],
|
||||
})
|
||||
|
||||
# 2. Exécuter chaque outil et ajouter les résultats
|
||||
for tc in msg.tool_calls:
|
||||
# Execute each tool and append results
|
||||
for tc in tool_calls_received:
|
||||
tool_name = tc.function.name
|
||||
try:
|
||||
args = (
|
||||
@@ -60,7 +75,7 @@ def chat(user_message: str) -> str:
|
||||
if isinstance(tc.function.arguments, str)
|
||||
else tc.function.arguments
|
||||
)
|
||||
print(f" 🔧 [MCP] {tool_name}({_short_args(args)})")
|
||||
print(f"\n 🔧 [MCP] {tool_name}({_short_args(args)})", flush=True)
|
||||
result = manager.call_tool(tool_name, args)
|
||||
except Exception as e:
|
||||
result = f"Erreur lors de l'appel à {tool_name} : {e}"
|
||||
@@ -70,13 +85,17 @@ def chat(user_message: str) -> str:
|
||||
"content": result,
|
||||
"tool_call_id": tc.id,
|
||||
})
|
||||
|
||||
# 3. Reboucler pour obtenir la réponse finale
|
||||
# Loop to get the next (final) response
|
||||
|
||||
else:
|
||||
reply = msg.content or ""
|
||||
_history.append({"role": "assistant", "content": reply})
|
||||
return reply
|
||||
# Pure text response — already yielded chunk by chunk; save to history
|
||||
_history.append({"role": "assistant", "content": accumulated_content})
|
||||
break
|
||||
|
||||
|
||||
def chat(user_message: str) -> str:
|
||||
"""Envoie un message au LLM et retourne la réponse complète (non-streaming)."""
|
||||
return "".join(chat_stream(user_message))
|
||||
|
||||
|
||||
def _short_args(args: dict) -> str:
|
||||
|
||||
@@ -12,12 +12,16 @@ Configure les serveurs dans un profil YAML sous la clé `mcp_servers` :
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
import threading
|
||||
from contextlib import AsyncExitStack
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# Racine du projet (le dossier qui contient main.py)
|
||||
_PROJECT_ROOT = Path(__file__).parent.parent.resolve()
|
||||
|
||||
|
||||
@dataclass
|
||||
class MCPServerConfig:
|
||||
@@ -27,6 +31,18 @@ class MCPServerConfig:
|
||||
env: dict[str, str] | None = None
|
||||
url: str | None = None
|
||||
|
||||
def resolved_args(self) -> list[str]:
|
||||
"""Résout les chemins relatifs dans args par rapport à la racine du projet."""
|
||||
result = []
|
||||
for arg in self.args:
|
||||
p = Path(arg)
|
||||
if not p.is_absolute() and p.suffix in (".js", ".py", ".ts"):
|
||||
resolved = (_PROJECT_ROOT / p).resolve()
|
||||
result.append(str(resolved))
|
||||
else:
|
||||
result.append(arg)
|
||||
return result
|
||||
|
||||
|
||||
def _sanitize_name(name: str) -> str:
|
||||
"""Transforme un nom en identifiant valide pour l'API Mistral (^[a-zA-Z0-9_-]{1,64}$)."""
|
||||
@@ -34,7 +50,12 @@ def _sanitize_name(name: str) -> str:
|
||||
|
||||
|
||||
class MCPManager:
|
||||
"""Gère les connexions aux serveurs MCP et l'exécution des outils."""
|
||||
"""Gère les connexions aux serveurs MCP et l'exécution des outils.
|
||||
|
||||
Chaque connexion tourne dans une "keeper task" qui possède toute la durée
|
||||
de vie du context manager stdio_client / ClientSession. Cela évite l'erreur
|
||||
anyio "Attempted to exit cancel scope in a different task".
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._loop = asyncio.new_event_loop()
|
||||
@@ -42,9 +63,9 @@ class MCPManager:
|
||||
self._thread.start()
|
||||
self._sessions: dict[str, Any] = {}
|
||||
self._raw_tools: dict[str, list] = {}
|
||||
# mistral_name -> (server_name, original_tool_name)
|
||||
self._tool_map: dict[str, tuple[str, str]] = {}
|
||||
self._exit_stacks: dict[str, AsyncExitStack] = {}
|
||||
# stop event per server, signalled at shutdown
|
||||
self._stop_events: dict[str, asyncio.Event] = {}
|
||||
|
||||
def _run_loop(self) -> None:
|
||||
asyncio.set_event_loop(self._loop)
|
||||
@@ -87,10 +108,18 @@ class MCPManager:
|
||||
return self._run(self._call_tool_async(server_name, tool_name, arguments))
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""Signale l'arrêt à toutes les keeper tasks et attend brièvement."""
|
||||
async def _signal_all() -> None:
|
||||
for ev in self._stop_events.values():
|
||||
ev.set()
|
||||
# Laisser une courte fenêtre pour que les tâches se terminent proprement
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
try:
|
||||
self._run(self._shutdown_async(), timeout=10)
|
||||
self._run(_signal_all(), timeout=5)
|
||||
except Exception:
|
||||
pass
|
||||
self._stop_events.clear()
|
||||
|
||||
def summary(self) -> list[tuple[str, int]]:
|
||||
"""Retourne [(server_name, tool_count), ...] pour les serveurs connectés."""
|
||||
@@ -112,38 +141,78 @@ class MCPManager:
|
||||
print(f"[MCP] ❌ Connexion {cfg.name} impossible : {e}")
|
||||
|
||||
async def _connect_server(self, cfg: MCPServerConfig) -> None:
|
||||
from mcp import ClientSession, StdioServerParameters
|
||||
from mcp.client.stdio import stdio_client
|
||||
"""Lance la keeper task et attend que la connexion soit établie."""
|
||||
stop_event = asyncio.Event()
|
||||
ready_event = asyncio.Event()
|
||||
error_holder: list[Exception] = []
|
||||
|
||||
stack = AsyncExitStack()
|
||||
self._stop_events[cfg.name] = stop_event
|
||||
|
||||
if cfg.command:
|
||||
params = StdioServerParameters(
|
||||
command=cfg.command,
|
||||
args=cfg.args or [],
|
||||
env=cfg.env,
|
||||
)
|
||||
read, write = await stack.enter_async_context(stdio_client(params))
|
||||
else:
|
||||
from mcp.client.sse import sse_client
|
||||
read, write = await stack.enter_async_context(sse_client(cfg.url))
|
||||
asyncio.create_task(
|
||||
self._run_server(cfg, ready_event, stop_event, error_holder),
|
||||
name=f"mcp-keeper-{cfg.name}",
|
||||
)
|
||||
|
||||
session = await stack.enter_async_context(ClientSession(read, write))
|
||||
await session.initialize()
|
||||
# Attendre que la connexion soit prête (ou échoue)
|
||||
try:
|
||||
await asyncio.wait_for(ready_event.wait(), timeout=30)
|
||||
except asyncio.TimeoutError:
|
||||
stop_event.set()
|
||||
raise TimeoutError(f"Timeout lors de la connexion à {cfg.name}")
|
||||
|
||||
self._sessions[cfg.name] = session
|
||||
self._exit_stacks[cfg.name] = stack
|
||||
if error_holder:
|
||||
raise error_holder[0]
|
||||
|
||||
tools_resp = await session.list_tools()
|
||||
self._raw_tools[cfg.name] = tools_resp.tools
|
||||
async def _run_server(
|
||||
self,
|
||||
cfg: MCPServerConfig,
|
||||
ready_event: asyncio.Event,
|
||||
stop_event: asyncio.Event,
|
||||
error_holder: list[Exception],
|
||||
) -> None:
|
||||
"""Keeper task : possède le context manager de bout en bout."""
|
||||
try:
|
||||
from mcp import ClientSession, StdioServerParameters
|
||||
|
||||
server_safe = _sanitize_name(cfg.name)
|
||||
for tool in tools_resp.tools:
|
||||
tool_safe = _sanitize_name(tool.name)
|
||||
mistral_name = f"{server_safe}__{tool_safe}"
|
||||
self._tool_map[mistral_name] = (cfg.name, tool.name)
|
||||
if cfg.command:
|
||||
from mcp.client.stdio import stdio_client
|
||||
transport = stdio_client(StdioServerParameters(
|
||||
command=cfg.command,
|
||||
args=cfg.resolved_args(),
|
||||
env=cfg.env,
|
||||
))
|
||||
else:
|
||||
from mcp.client.sse import sse_client
|
||||
transport = sse_client(cfg.url)
|
||||
|
||||
print(f"[MCP] ✅ {cfg.name} — {len(tools_resp.tools)} outil(s) disponible(s)")
|
||||
async with transport as (read, write):
|
||||
async with ClientSession(read, write) as session:
|
||||
await session.initialize()
|
||||
|
||||
tools_resp = await session.list_tools()
|
||||
self._sessions[cfg.name] = session
|
||||
self._raw_tools[cfg.name] = tools_resp.tools
|
||||
|
||||
server_safe = _sanitize_name(cfg.name)
|
||||
for tool in tools_resp.tools:
|
||||
tool_safe = _sanitize_name(tool.name)
|
||||
self._tool_map[f"{server_safe}__{tool_safe}"] = (cfg.name, tool.name)
|
||||
|
||||
print(f"[MCP] ✅ {cfg.name} — {len(tools_resp.tools)} outil(s) disponible(s)")
|
||||
ready_event.set()
|
||||
|
||||
# Maintenir la connexion jusqu'au signal d'arrêt
|
||||
await stop_event.wait()
|
||||
|
||||
except Exception as e:
|
||||
error_holder.append(e)
|
||||
ready_event.set() # débloquer _connect_server même en cas d'erreur
|
||||
finally:
|
||||
self._sessions.pop(cfg.name, None)
|
||||
self._raw_tools.pop(cfg.name, None)
|
||||
to_remove = [k for k, v in self._tool_map.items() if v[0] == cfg.name]
|
||||
for k in to_remove:
|
||||
del self._tool_map[k]
|
||||
|
||||
async def _call_tool_async(self, server_name: str, tool_name: str, arguments: dict) -> str:
|
||||
session = self._sessions[server_name]
|
||||
@@ -156,17 +225,6 @@ class MCPManager:
|
||||
parts.append(str(item))
|
||||
return "\n".join(parts) if parts else "(aucun résultat)"
|
||||
|
||||
async def _shutdown_async(self) -> None:
|
||||
for stack in list(self._exit_stacks.values()):
|
||||
try:
|
||||
await stack.aclose()
|
||||
except Exception:
|
||||
pass
|
||||
self._sessions.clear()
|
||||
self._raw_tools.clear()
|
||||
self._tool_map.clear()
|
||||
self._exit_stacks.clear()
|
||||
|
||||
|
||||
_manager: MCPManager | None = None
|
||||
_lock = threading.Lock()
|
||||
@@ -185,4 +243,6 @@ def reset_manager() -> None:
|
||||
with _lock:
|
||||
if _manager is not None:
|
||||
_manager.shutdown()
|
||||
_manager = MCPManager()
|
||||
_manager = MCPManager()
|
||||
|
||||
|
||||
|
||||
109
assistant/stt.py
Normal file
109
assistant/stt.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Transcription vocale temps réel via Voxtral Mini Transcribe Realtime.
|
||||
|
||||
Flux : microphone (PCM 16kHz) → WebSocket Voxtral → texte transcrit
|
||||
"""
|
||||
import asyncio
|
||||
import sys
|
||||
from typing import AsyncIterator
|
||||
|
||||
import numpy as np
|
||||
import sounddevice as sd
|
||||
from mistralai.client import Mistral
|
||||
from mistralai.client.models import (
|
||||
AudioFormat,
|
||||
TranscriptionStreamDone,
|
||||
TranscriptionStreamTextDelta,
|
||||
)
|
||||
|
||||
from . import config
|
||||
|
||||
STT_MODEL = "voxtral-mini-transcribe-realtime-2602"
|
||||
SAMPLE_RATE = 16000
|
||||
CHANNELS = 1
|
||||
CHUNK_FRAMES = 1600 # 100ms de son par chunk
|
||||
|
||||
|
||||
async def _mic_stream(stop_event: asyncio.Event) -> AsyncIterator[bytes]:
|
||||
"""Capture le microphone et yield des chunks PCM int16 jusqu'à stop_event."""
|
||||
loop = asyncio.get_event_loop()
|
||||
queue: asyncio.Queue[bytes | None] = asyncio.Queue()
|
||||
|
||||
def callback(indata: np.ndarray, frames: int, time, status) -> None:
|
||||
if status:
|
||||
print(f"[Mic] {status}", file=sys.stderr)
|
||||
# Convertir en int16 little-endian et envoyer
|
||||
pcm = (indata[:, 0] * 32767).astype(np.int16).tobytes()
|
||||
loop.call_soon_threadsafe(queue.put_nowait, pcm)
|
||||
|
||||
stream = sd.InputStream(
|
||||
samplerate=SAMPLE_RATE,
|
||||
channels=CHANNELS,
|
||||
dtype="float32",
|
||||
blocksize=CHUNK_FRAMES,
|
||||
callback=callback,
|
||||
)
|
||||
|
||||
with stream:
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
chunk = await asyncio.wait_for(queue.get(), timeout=0.2)
|
||||
yield chunk
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
|
||||
# Vider la queue restante
|
||||
while not queue.empty():
|
||||
chunk = queue.get_nowait()
|
||||
if chunk:
|
||||
yield chunk
|
||||
|
||||
|
||||
async def transcribe_from_mic() -> str:
|
||||
"""
|
||||
Écoute le microphone jusqu'à ce que l'utilisateur appuie sur Entrée,
|
||||
puis retourne le texte transcrit.
|
||||
"""
|
||||
client = Mistral(api_key=config.MISTRAL_API_KEY)
|
||||
stop_event = asyncio.Event()
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
print("🎤 Parlez... (Entrée pour arrêter)")
|
||||
|
||||
# Attendre Entrée dans un thread pour ne pas bloquer l'event loop
|
||||
async def wait_for_enter() -> None:
|
||||
await loop.run_in_executor(None, input)
|
||||
stop_event.set()
|
||||
|
||||
enter_task = asyncio.create_task(wait_for_enter())
|
||||
|
||||
audio_fmt = AudioFormat(
|
||||
encoding="pcm_s16le",
|
||||
sample_rate=SAMPLE_RATE,
|
||||
)
|
||||
|
||||
final_text = ""
|
||||
|
||||
try:
|
||||
async for event in client.audio.realtime.transcribe_stream(
|
||||
audio_stream=_mic_stream(stop_event),
|
||||
model=STT_MODEL,
|
||||
audio_format=audio_fmt,
|
||||
target_streaming_delay_ms=300,
|
||||
):
|
||||
if isinstance(event, TranscriptionStreamTextDelta):
|
||||
# Affichage en temps réel du texte partiel
|
||||
print(event.text, end="", flush=True)
|
||||
elif isinstance(event, TranscriptionStreamDone):
|
||||
final_text = event.text
|
||||
print() # saut de ligne après la transcription
|
||||
break
|
||||
finally:
|
||||
stop_event.set()
|
||||
enter_task.cancel()
|
||||
try:
|
||||
await enter_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
return final_text.strip()
|
||||
54
assistant/tts.py
Normal file
54
assistant/tts.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import base64
|
||||
from mistralai.client import Mistral
|
||||
from . import config
|
||||
|
||||
_client = Mistral(api_key=config.MISTRAL_API_KEY)
|
||||
|
||||
_default_voice_id: str | None = None
|
||||
|
||||
|
||||
def _get_default_voice_id() -> str:
|
||||
"""
|
||||
Récupère et met en cache l'ID d'une voix preset.
|
||||
Priorise les voix supportant la langue configurée (VOICE_LANGUAGE).
|
||||
"""
|
||||
global _default_voice_id
|
||||
if _default_voice_id is not None:
|
||||
return _default_voice_id
|
||||
|
||||
voices = _client.audio.voices.list(type_="preset", limit=50)
|
||||
if not voices.items:
|
||||
raise RuntimeError(
|
||||
"Aucune voix disponible. Configurez VOICE_ID dans .env ou créez une voix "
|
||||
"avec scripts/register_voice.py"
|
||||
)
|
||||
|
||||
# Cherche une voix supportant la langue configurée
|
||||
preferred_lang = config.VOICE_LANGUAGE
|
||||
matching = [
|
||||
v for v in voices.items
|
||||
if v.languages and preferred_lang in v.languages
|
||||
]
|
||||
chosen = matching[0] if matching else voices.items[0]
|
||||
|
||||
_default_voice_id = chosen.id
|
||||
print(f"[TTS] Voix sélectionnée : {chosen.name} (langues: {chosen.languages}) — id: {_default_voice_id}")
|
||||
return _default_voice_id
|
||||
|
||||
|
||||
def text_to_speech(text: str, voice_id: str | None = None) -> bytes:
|
||||
"""
|
||||
Convertit du texte en audio WAV via Voxtral TTS.
|
||||
|
||||
WAV est lu nativement par aplay (Linux) et afplay (macOS) sans conversion.
|
||||
"""
|
||||
effective_voice_id = voice_id or config.VOICE_ID or _get_default_voice_id()
|
||||
|
||||
response = _client.audio.speech.complete(
|
||||
model=config.TTS_MODEL,
|
||||
input=text,
|
||||
voice_id=effective_voice_id,
|
||||
response_format="wav",
|
||||
)
|
||||
|
||||
return base64.b64decode(response.audio_data)
|
||||
Reference in New Issue
Block a user