300 lines
10 KiB
Python
300 lines
10 KiB
Python
import asyncio
|
|
import queue as _queue
|
|
import re
|
|
import sys
|
|
import threading
|
|
from . import llm, tts, audio, config
|
|
|
|
|
|
HELP_TEXT = """
|
|
Commandes disponibles :
|
|
exit / quit Quitter l'assistant
|
|
reset Effacer l'historique de conversation
|
|
voice <id> Changer la voix Voxtral (voice_id)
|
|
voice clear Revenir à la voix par défaut
|
|
mode texte Passer en mode saisie texte (défaut)
|
|
mode vocal Passer en mode entrée microphone
|
|
profiles Lister les profils de personnalité disponibles
|
|
profile <slug> Charger un profil (ex: profile traveller_scout)
|
|
mcp Lister les serveurs MCP connectés
|
|
mcp tools Lister tous les outils MCP disponibles
|
|
help Afficher ce message
|
|
|
|
Mode vocal : appuyez sur Entrée (sans rien écrire) pour commencer à parler,
|
|
puis Entrée à nouveau pour envoyer.
|
|
"""
|
|
|
|
|
|
def _set_voice(parts: list[str]) -> None:
|
|
if len(parts) < 2:
|
|
print("Usage : voice <id> ou voice clear")
|
|
return
|
|
if parts[1] == "clear":
|
|
config.VOICE_ID = None
|
|
print("Voix réinitialisée (défaut).")
|
|
else:
|
|
config.VOICE_ID = parts[1]
|
|
print(f"Voix définie sur : {config.VOICE_ID}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Sentence splitting & markdown cleaning for TTS pipeline
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Split on sentence boundaries using fixed-width lookbehinds (Python constraint).
|
|
# For ".": 3-char lookbehind — require the 2 chars before the period to be
|
|
# lowercase, digit, hyphen, or closing bracket (NOT uppercase). This protects
|
|
# "M.", "No.", "A." while correctly splitting "arrivé.", "Jump-2.", "(J2).", "ok."
|
|
_SENT_RE = re.compile(
|
|
r'(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.])\s+' # word(2+) + period
|
|
r'|(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.][»"\')\]])\s+' # + closing quote
|
|
r'|(?<=[!?…])\s+' # ! ? …
|
|
r'|(?<=[!?…][»"\')\]])\s+' # ! ? … + closing quote
|
|
r'|\n{2,}' # paragraph break
|
|
)
|
|
|
|
# Markdown patterns to strip before TTS (keep inner text where applicable)
|
|
_MD_CODE_BLOCK = re.compile(r'```.*?```', re.DOTALL)
|
|
_MD_INLINE = re.compile(r'\*{1,3}(.*?)\*{1,3}|_{1,2}(.*?)_{1,2}|~~(.*?)~~|`([^`]+)`', re.DOTALL)
|
|
_MD_LINK = re.compile(r'\[([^\]]*)\]\([^\)]*\)')
|
|
_MD_HEADER = re.compile(r'^#{1,6}\s+', re.MULTILINE)
|
|
_MULTI_SPACE = re.compile(r'\s+')
|
|
|
|
|
|
def _split_sentences(text: str) -> tuple[list[str], str]:
|
|
"""Extrait les phrases complètes d'un buffer partiel.
|
|
|
|
Returns (complete_sentences, remainder).
|
|
"""
|
|
parts = _SENT_RE.split(text)
|
|
if len(parts) <= 1:
|
|
return [], text
|
|
return parts[:-1], parts[-1]
|
|
|
|
|
|
def _clean_for_tts(text: str) -> str:
|
|
"""Supprime le formatage Markdown avant la synthèse vocale."""
|
|
text = _MD_CODE_BLOCK.sub(' ', text)
|
|
text = _MD_INLINE.sub(lambda m: next(g for g in m.groups() if g is not None), text)
|
|
text = _MD_LINK.sub(r'\1', text)
|
|
text = _MD_HEADER.sub('', text)
|
|
text = text.replace('→', ' vers ').replace('←', ' depuis ')
|
|
text = _MULTI_SPACE.sub(' ', text).strip()
|
|
return text
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Message processing — streaming TTS pipeline
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _process_message(user_input: str) -> None:
|
|
"""Envoie un message au LLM et lit la réponse à voix haute.
|
|
|
|
Pipeline 3 étages en parallèle :
|
|
[LLM stream] → sentence_queue → [TTS thread] → audio_queue → [Audio thread]
|
|
|
|
Dès qu'une phrase complète est détectée dans le stream LLM, elle part
|
|
immédiatement en synthèse Voxtral. L'audio est joué dès qu'il est prêt,
|
|
pendant que la phrase suivante est déjà en cours de synthèse.
|
|
"""
|
|
SENTINEL = object()
|
|
sentence_q: _queue.Queue = _queue.Queue()
|
|
audio_q: _queue.Queue = _queue.Queue()
|
|
|
|
def tts_worker() -> None:
|
|
while True:
|
|
item = sentence_q.get()
|
|
if item is SENTINEL:
|
|
audio_q.put(SENTINEL)
|
|
return
|
|
text = _clean_for_tts(item)
|
|
if text:
|
|
try:
|
|
audio_bytes = tts.text_to_speech(text)
|
|
audio_q.put(audio_bytes)
|
|
except Exception as e:
|
|
print(f"\n[Erreur TTS] {e}", flush=True)
|
|
|
|
def audio_worker() -> None:
|
|
while True:
|
|
item = audio_q.get()
|
|
if item is SENTINEL:
|
|
return
|
|
try:
|
|
audio.play_audio(item)
|
|
except Exception as e:
|
|
print(f"\n[Erreur Audio] {e}", flush=True)
|
|
|
|
tts_thread = threading.Thread(target=tts_worker, daemon=True, name="tts-worker")
|
|
audio_thread = threading.Thread(target=audio_worker, daemon=True, name="audio-worker")
|
|
tts_thread.start()
|
|
audio_thread.start()
|
|
|
|
print("Arioch > ", end="", flush=True)
|
|
buffer = ""
|
|
try:
|
|
for chunk in llm.chat_stream(user_input):
|
|
print(chunk, end="", flush=True)
|
|
buffer += chunk
|
|
sentences, buffer = _split_sentences(buffer)
|
|
for sentence in sentences:
|
|
sentence = sentence.strip()
|
|
if sentence:
|
|
sentence_q.put(sentence)
|
|
except Exception as e:
|
|
print(f"\n[Erreur LLM] {e}", flush=True)
|
|
|
|
# Flush any remaining text after the stream ends
|
|
if buffer.strip():
|
|
sentence_q.put(buffer.strip())
|
|
|
|
print() # newline after full response
|
|
sentence_q.put(SENTINEL)
|
|
|
|
tts_thread.join()
|
|
audio_thread.join()
|
|
|
|
|
|
def _handle_command(user_input: str) -> bool:
|
|
"""Gère les commandes spéciales. Retourne True si c'était une commande."""
|
|
from .profile import list_profiles, apply_profile
|
|
|
|
lower = user_input.lower()
|
|
parts = user_input.split()
|
|
|
|
if lower in ("exit", "quit"):
|
|
print("Au revoir !")
|
|
sys.exit(0)
|
|
elif lower == "reset":
|
|
llm.reset_history()
|
|
print("Historique effacé.\n")
|
|
return True
|
|
elif lower == "help":
|
|
print(HELP_TEXT)
|
|
return True
|
|
elif lower.startswith("voice"):
|
|
_set_voice(parts)
|
|
return True
|
|
elif lower in ("mode texte", "mode text"):
|
|
return True # signal au caller
|
|
elif lower in ("mode vocal", "mode voix", "mode voice"):
|
|
return True # signal au caller
|
|
elif lower == "profiles":
|
|
_list_profiles(list_profiles())
|
|
return True
|
|
elif lower.startswith("mcp"):
|
|
_handle_mcp(parts)
|
|
return True
|
|
elif lower.startswith("profile ") and len(parts) >= 2:
|
|
_load_profile(parts[1], apply_profile)
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def _handle_mcp(parts: list[str]) -> None:
|
|
from . import mcp_client
|
|
manager = mcp_client.get_manager()
|
|
servers = manager.summary()
|
|
|
|
if len(parts) >= 2 and parts[1] == "tools":
|
|
tools = manager.get_mistral_tools()
|
|
if not tools:
|
|
print("Aucun outil MCP disponible.")
|
|
return
|
|
print(f"\n{len(tools)} outil(s) MCP disponible(s) :")
|
|
for t in tools:
|
|
fn = t["function"]
|
|
desc = fn.get("description", "")
|
|
print(f" {fn['name']:<45} {desc[:60]}")
|
|
print()
|
|
return
|
|
|
|
if not servers:
|
|
print("Aucun serveur MCP connecté. Configurez 'mcp_servers' dans un profil YAML.\n")
|
|
return
|
|
|
|
print("\nServeurs MCP connectés :")
|
|
for name, count in servers:
|
|
print(f" {name:<30} {count} outil(s)")
|
|
total = sum(c for _, c in servers)
|
|
print(f"\nTotal : {total} outil(s). Tapez 'mcp tools' pour les lister.\n")
|
|
|
|
|
|
def _list_profiles(profiles: list) -> None:
|
|
if not profiles:
|
|
print("Aucun profil disponible dans profiles/")
|
|
return
|
|
print("\nProfils disponibles :")
|
|
for slug, name, desc in profiles:
|
|
print(f" {slug:<25} {name}" + (f" — {desc}" if desc else ""))
|
|
print("\nUsage : profile <slug>\n")
|
|
|
|
|
|
def _load_profile(slug: str, apply_fn) -> None:
|
|
try:
|
|
profile = apply_fn(slug)
|
|
print(f"✅ Profil chargé : {profile.name}")
|
|
if profile.description:
|
|
print(f" {profile.description}")
|
|
print()
|
|
except FileNotFoundError as e:
|
|
print(f"[Profil] {e}")
|
|
|
|
|
|
def run() -> None:
|
|
print("🎙️ Arioch — Assistant vocal (Mistral Large + Voxtral)")
|
|
print("Commandes : 'profiles' pour voir les personnalités, 'mode vocal' pour parler, 'help' pour l'aide.\n")
|
|
|
|
vocal_mode = False
|
|
|
|
while True:
|
|
try:
|
|
if vocal_mode:
|
|
prompt = "🎤 [vocal] Entrée pour parler > "
|
|
else:
|
|
prompt = "Vous > "
|
|
|
|
user_input = input(prompt).strip()
|
|
except (EOFError, KeyboardInterrupt):
|
|
print("\nAu revoir !")
|
|
sys.exit(0)
|
|
|
|
if not user_input:
|
|
if vocal_mode:
|
|
# Entrée vide en mode vocal → lancer la capture micro
|
|
try:
|
|
from .stt import transcribe_from_mic
|
|
user_input = asyncio.run(transcribe_from_mic())
|
|
except Exception as e:
|
|
print(f"[Erreur STT] {e}")
|
|
continue
|
|
|
|
if not user_input:
|
|
print("(rien capturé)")
|
|
continue
|
|
|
|
print(f"Vous (transcrit) : {user_input}")
|
|
_process_message(user_input)
|
|
continue
|
|
|
|
lower = user_input.lower()
|
|
|
|
# Changement de mode
|
|
if lower in ("mode vocal", "mode voix", "mode voice"):
|
|
vocal_mode = True
|
|
print("Mode vocal activé. Appuyez sur Entrée (sans rien écrire) pour parler.\n")
|
|
continue
|
|
elif lower in ("mode texte", "mode text"):
|
|
vocal_mode = False
|
|
print("Mode texte activé.\n")
|
|
continue
|
|
|
|
# Autres commandes
|
|
if _handle_command(user_input):
|
|
continue
|
|
|
|
# Message normal (texte)
|
|
_process_message(user_input)
|
|
|