Files
arioch-assistant/assistant/cli.py
2026-04-07 22:06:36 +02:00

300 lines
10 KiB
Python

import asyncio
import queue as _queue
import re
import sys
import threading
from . import llm, tts, audio, config
HELP_TEXT = """
Commandes disponibles :
exit / quit Quitter l'assistant
reset Effacer l'historique de conversation
voice <id> Changer la voix Voxtral (voice_id)
voice clear Revenir à la voix par défaut
mode texte Passer en mode saisie texte (défaut)
mode vocal Passer en mode entrée microphone
profiles Lister les profils de personnalité disponibles
profile <slug> Charger un profil (ex: profile traveller_scout)
mcp Lister les serveurs MCP connectés
mcp tools Lister tous les outils MCP disponibles
help Afficher ce message
Mode vocal : appuyez sur Entrée (sans rien écrire) pour commencer à parler,
puis Entrée à nouveau pour envoyer.
"""
def _set_voice(parts: list[str]) -> None:
if len(parts) < 2:
print("Usage : voice <id> ou voice clear")
return
if parts[1] == "clear":
config.VOICE_ID = None
print("Voix réinitialisée (défaut).")
else:
config.VOICE_ID = parts[1]
print(f"Voix définie sur : {config.VOICE_ID}")
# ---------------------------------------------------------------------------
# Sentence splitting & markdown cleaning for TTS pipeline
# ---------------------------------------------------------------------------
# Split on sentence boundaries using fixed-width lookbehinds (Python constraint).
# For ".": 3-char lookbehind — require the 2 chars before the period to be
# lowercase, digit, hyphen, or closing bracket (NOT uppercase). This protects
# "M.", "No.", "A." while correctly splitting "arrivé.", "Jump-2.", "(J2).", "ok."
_SENT_RE = re.compile(
r'(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.])\s+' # word(2+) + period
r'|(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.][»"\')\]])\s+' # + closing quote
r'|(?<=[!?…])\s+' # ! ? …
r'|(?<=[!?…][»"\')\]])\s+' # ! ? … + closing quote
r'|\n{2,}' # paragraph break
)
# Markdown patterns to strip before TTS (keep inner text where applicable)
_MD_CODE_BLOCK = re.compile(r'```.*?```', re.DOTALL)
_MD_INLINE = re.compile(r'\*{1,3}(.*?)\*{1,3}|_{1,2}(.*?)_{1,2}|~~(.*?)~~|`([^`]+)`', re.DOTALL)
_MD_LINK = re.compile(r'\[([^\]]*)\]\([^\)]*\)')
_MD_HEADER = re.compile(r'^#{1,6}\s+', re.MULTILINE)
_MULTI_SPACE = re.compile(r'\s+')
def _split_sentences(text: str) -> tuple[list[str], str]:
"""Extrait les phrases complètes d'un buffer partiel.
Returns (complete_sentences, remainder).
"""
parts = _SENT_RE.split(text)
if len(parts) <= 1:
return [], text
return parts[:-1], parts[-1]
def _clean_for_tts(text: str) -> str:
"""Supprime le formatage Markdown avant la synthèse vocale."""
text = _MD_CODE_BLOCK.sub(' ', text)
text = _MD_INLINE.sub(lambda m: next(g for g in m.groups() if g is not None), text)
text = _MD_LINK.sub(r'\1', text)
text = _MD_HEADER.sub('', text)
text = text.replace('', ' vers ').replace('', ' depuis ')
text = _MULTI_SPACE.sub(' ', text).strip()
return text
# ---------------------------------------------------------------------------
# Message processing — streaming TTS pipeline
# ---------------------------------------------------------------------------
def _process_message(user_input: str) -> None:
"""Envoie un message au LLM et lit la réponse à voix haute.
Pipeline 3 étages en parallèle :
[LLM stream] → sentence_queue → [TTS thread] → audio_queue → [Audio thread]
Dès qu'une phrase complète est détectée dans le stream LLM, elle part
immédiatement en synthèse Voxtral. L'audio est joué dès qu'il est prêt,
pendant que la phrase suivante est déjà en cours de synthèse.
"""
SENTINEL = object()
sentence_q: _queue.Queue = _queue.Queue()
audio_q: _queue.Queue = _queue.Queue()
def tts_worker() -> None:
while True:
item = sentence_q.get()
if item is SENTINEL:
audio_q.put(SENTINEL)
return
text = _clean_for_tts(item)
if text:
try:
audio_bytes = tts.text_to_speech(text)
audio_q.put(audio_bytes)
except Exception as e:
print(f"\n[Erreur TTS] {e}", flush=True)
def audio_worker() -> None:
while True:
item = audio_q.get()
if item is SENTINEL:
return
try:
audio.play_audio(item)
except Exception as e:
print(f"\n[Erreur Audio] {e}", flush=True)
tts_thread = threading.Thread(target=tts_worker, daemon=True, name="tts-worker")
audio_thread = threading.Thread(target=audio_worker, daemon=True, name="audio-worker")
tts_thread.start()
audio_thread.start()
print("Arioch > ", end="", flush=True)
buffer = ""
try:
for chunk in llm.chat_stream(user_input):
print(chunk, end="", flush=True)
buffer += chunk
sentences, buffer = _split_sentences(buffer)
for sentence in sentences:
sentence = sentence.strip()
if sentence:
sentence_q.put(sentence)
except Exception as e:
print(f"\n[Erreur LLM] {e}", flush=True)
# Flush any remaining text after the stream ends
if buffer.strip():
sentence_q.put(buffer.strip())
print() # newline after full response
sentence_q.put(SENTINEL)
tts_thread.join()
audio_thread.join()
def _handle_command(user_input: str) -> bool:
"""Gère les commandes spéciales. Retourne True si c'était une commande."""
from .profile import list_profiles, apply_profile
lower = user_input.lower()
parts = user_input.split()
if lower in ("exit", "quit"):
print("Au revoir !")
sys.exit(0)
elif lower == "reset":
llm.reset_history()
print("Historique effacé.\n")
return True
elif lower == "help":
print(HELP_TEXT)
return True
elif lower.startswith("voice"):
_set_voice(parts)
return True
elif lower in ("mode texte", "mode text"):
return True # signal au caller
elif lower in ("mode vocal", "mode voix", "mode voice"):
return True # signal au caller
elif lower == "profiles":
_list_profiles(list_profiles())
return True
elif lower.startswith("mcp"):
_handle_mcp(parts)
return True
elif lower.startswith("profile ") and len(parts) >= 2:
_load_profile(parts[1], apply_profile)
return True
return False
def _handle_mcp(parts: list[str]) -> None:
from . import mcp_client
manager = mcp_client.get_manager()
servers = manager.summary()
if len(parts) >= 2 and parts[1] == "tools":
tools = manager.get_mistral_tools()
if not tools:
print("Aucun outil MCP disponible.")
return
print(f"\n{len(tools)} outil(s) MCP disponible(s) :")
for t in tools:
fn = t["function"]
desc = fn.get("description", "")
print(f" {fn['name']:<45} {desc[:60]}")
print()
return
if not servers:
print("Aucun serveur MCP connecté. Configurez 'mcp_servers' dans un profil YAML.\n")
return
print("\nServeurs MCP connectés :")
for name, count in servers:
print(f" {name:<30} {count} outil(s)")
total = sum(c for _, c in servers)
print(f"\nTotal : {total} outil(s). Tapez 'mcp tools' pour les lister.\n")
def _list_profiles(profiles: list) -> None:
if not profiles:
print("Aucun profil disponible dans profiles/")
return
print("\nProfils disponibles :")
for slug, name, desc in profiles:
print(f" {slug:<25} {name}" + (f"{desc}" if desc else ""))
print("\nUsage : profile <slug>\n")
def _load_profile(slug: str, apply_fn) -> None:
try:
profile = apply_fn(slug)
print(f"✅ Profil chargé : {profile.name}")
if profile.description:
print(f" {profile.description}")
print()
except FileNotFoundError as e:
print(f"[Profil] {e}")
def run() -> None:
print("🎙️ Arioch — Assistant vocal (Mistral Large + Voxtral)")
print("Commandes : 'profiles' pour voir les personnalités, 'mode vocal' pour parler, 'help' pour l'aide.\n")
vocal_mode = False
while True:
try:
if vocal_mode:
prompt = "🎤 [vocal] Entrée pour parler > "
else:
prompt = "Vous > "
user_input = input(prompt).strip()
except (EOFError, KeyboardInterrupt):
print("\nAu revoir !")
sys.exit(0)
if not user_input:
if vocal_mode:
# Entrée vide en mode vocal → lancer la capture micro
try:
from .stt import transcribe_from_mic
user_input = asyncio.run(transcribe_from_mic())
except Exception as e:
print(f"[Erreur STT] {e}")
continue
if not user_input:
print("(rien capturé)")
continue
print(f"Vous (transcrit) : {user_input}")
_process_message(user_input)
continue
lower = user_input.lower()
# Changement de mode
if lower in ("mode vocal", "mode voix", "mode voice"):
vocal_mode = True
print("Mode vocal activé. Appuyez sur Entrée (sans rien écrire) pour parler.\n")
continue
elif lower in ("mode texte", "mode text"):
vocal_mode = False
print("Mode texte activé.\n")
continue
# Autres commandes
if _handle_command(user_input):
continue
# Message normal (texte)
_process_message(user_input)