Add gitignor

This commit is contained in:
2026-04-07 22:06:36 +02:00
parent 49802d89d5
commit 5703cf5871
98 changed files with 5329 additions and 72 deletions

View File

@@ -1,5 +1,8 @@
import asyncio
import queue as _queue
import re
import sys
import threading
from . import llm, tts, audio, config
@@ -34,22 +37,122 @@ def _set_voice(parts: list[str]) -> None:
print(f"Voix définie sur : {config.VOICE_ID}")
# ---------------------------------------------------------------------------
# Sentence splitting & markdown cleaning for TTS pipeline
# ---------------------------------------------------------------------------
# Split on sentence boundaries using fixed-width lookbehinds (Python constraint).
# For ".": 3-char lookbehind — require the 2 chars before the period to be
# lowercase, digit, hyphen, or closing bracket (NOT uppercase). This protects
# "M.", "No.", "A." while correctly splitting "arrivé.", "Jump-2.", "(J2).", "ok."
_SENT_RE = re.compile(
r'(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.])\s+' # word(2+) + period
r'|(?<=[a-z\u00e0-\u00ff\d\-)\]]{2}[.][»"\')\]])\s+' # + closing quote
r'|(?<=[!?…])\s+' # ! ? …
r'|(?<=[!?…][»"\')\]])\s+' # ! ? … + closing quote
r'|\n{2,}' # paragraph break
)
# Markdown patterns to strip before TTS (keep inner text where applicable)
_MD_CODE_BLOCK = re.compile(r'```.*?```', re.DOTALL)
_MD_INLINE = re.compile(r'\*{1,3}(.*?)\*{1,3}|_{1,2}(.*?)_{1,2}|~~(.*?)~~|`([^`]+)`', re.DOTALL)
_MD_LINK = re.compile(r'\[([^\]]*)\]\([^\)]*\)')
_MD_HEADER = re.compile(r'^#{1,6}\s+', re.MULTILINE)
_MULTI_SPACE = re.compile(r'\s+')
def _split_sentences(text: str) -> tuple[list[str], str]:
"""Extrait les phrases complètes d'un buffer partiel.
Returns (complete_sentences, remainder).
"""
parts = _SENT_RE.split(text)
if len(parts) <= 1:
return [], text
return parts[:-1], parts[-1]
def _clean_for_tts(text: str) -> str:
"""Supprime le formatage Markdown avant la synthèse vocale."""
text = _MD_CODE_BLOCK.sub(' ', text)
text = _MD_INLINE.sub(lambda m: next(g for g in m.groups() if g is not None), text)
text = _MD_LINK.sub(r'\1', text)
text = _MD_HEADER.sub('', text)
text = text.replace('', ' vers ').replace('', ' depuis ')
text = _MULTI_SPACE.sub(' ', text).strip()
return text
# ---------------------------------------------------------------------------
# Message processing — streaming TTS pipeline
# ---------------------------------------------------------------------------
def _process_message(user_input: str) -> None:
"""Envoie un message au LLM et lit la réponse à voix haute."""
print(f"Arioch > ", end="", flush=True)
try:
reply = llm.chat(user_input)
except Exception as e:
print(f"\n[Erreur LLM] {e}")
return
"""Envoie un message au LLM et lit la réponse à voix haute.
print(reply)
Pipeline 3 étages en parallèle :
[LLM stream] → sentence_queue → [TTS thread] → audio_queue → [Audio thread]
Dès qu'une phrase complète est détectée dans le stream LLM, elle part
immédiatement en synthèse Voxtral. L'audio est joué dès qu'il est prêt,
pendant que la phrase suivante est déjà en cours de synthèse.
"""
SENTINEL = object()
sentence_q: _queue.Queue = _queue.Queue()
audio_q: _queue.Queue = _queue.Queue()
def tts_worker() -> None:
while True:
item = sentence_q.get()
if item is SENTINEL:
audio_q.put(SENTINEL)
return
text = _clean_for_tts(item)
if text:
try:
audio_bytes = tts.text_to_speech(text)
audio_q.put(audio_bytes)
except Exception as e:
print(f"\n[Erreur TTS] {e}", flush=True)
def audio_worker() -> None:
while True:
item = audio_q.get()
if item is SENTINEL:
return
try:
audio.play_audio(item)
except Exception as e:
print(f"\n[Erreur Audio] {e}", flush=True)
tts_thread = threading.Thread(target=tts_worker, daemon=True, name="tts-worker")
audio_thread = threading.Thread(target=audio_worker, daemon=True, name="audio-worker")
tts_thread.start()
audio_thread.start()
print("Arioch > ", end="", flush=True)
buffer = ""
try:
audio_bytes = tts.text_to_speech(reply)
audio.play_audio(audio_bytes)
for chunk in llm.chat_stream(user_input):
print(chunk, end="", flush=True)
buffer += chunk
sentences, buffer = _split_sentences(buffer)
for sentence in sentences:
sentence = sentence.strip()
if sentence:
sentence_q.put(sentence)
except Exception as e:
print(f"[Erreur TTS/Audio] {e}")
print(f"\n[Erreur LLM] {e}", flush=True)
# Flush any remaining text after the stream ends
if buffer.strip():
sentence_q.put(buffer.strip())
print() # newline after full response
sentence_q.put(SENTINEL)
tts_thread.join()
audio_thread.join()
def _handle_command(user_input: str) -> bool:
@@ -118,7 +221,7 @@ def _handle_mcp(parts: list[str]) -> None:
print(f"\nTotal : {total} outil(s). Tapez 'mcp tools' pour les lister.\n")
def _list_profiles(profiles: list) -> None:
if not profiles:
print("Aucun profil disponible dans profiles/")
return