226 lines
7.6 KiB
Python
226 lines
7.6 KiB
Python
import logging
|
|
import os
|
|
import config
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import chromadb
|
|
_chroma_ok = True
|
|
except ImportError:
|
|
_chroma_ok = False
|
|
log.warning("chromadb non installé — pip install chromadb")
|
|
|
|
try:
|
|
import ollama as ollama_lib
|
|
_ollama_ok = True
|
|
except ImportError:
|
|
_ollama_ok = False
|
|
|
|
MOTS_PRODUITS = {
|
|
"multiplus": "Victron MultiPlus",
|
|
"quattro": "Victron MultiPlus",
|
|
"ve.bus": "Victron MultiPlus",
|
|
"vebus": "Victron MultiPlus",
|
|
"ess": "Victron MultiPlus",
|
|
"victron": "Victron MultiPlus",
|
|
"fronius": "Onduleur Fronius",
|
|
"symo": "Onduleur Fronius Symo",
|
|
"sma": "Onduleur SMA",
|
|
"huawei": "Onduleur Huawei",
|
|
"byd": "Batterie BYD",
|
|
"pylontech": "Batterie Pylontech",
|
|
"daikin": "PAC Daikin",
|
|
"viessmann": "PAC Viessmann",
|
|
"keba": "Borne IRVE Keba",
|
|
}
|
|
|
|
SYSTEM_PROMPT = """Tu es un expert technique pour des électriciens terrain en Alsace.
|
|
Installations : photovoltaïque, PAC, IRVE, onduleurs Victron.
|
|
|
|
FORMAT DE RÉPONSE OBLIGATOIRE (toujours respecter ce format) :
|
|
⚠️ Problème : [1 ligne — ce qui se passe]
|
|
✅ Actions : [2-4 étapes numérotées, concrètes, actionnables]
|
|
📄 Source : [nom du document et page si disponible]
|
|
|
|
RÈGLES :
|
|
- Maximum 6 lignes au total
|
|
- Chaque action = 1 geste précis que le technicien peut faire maintenant
|
|
- Si tu ne sais pas → réponds "❓ Consulter le manuel constructeur"
|
|
- Jamais de phrases génériques comme "vérifier l'installation"
|
|
"""
|
|
|
|
USER_PROMPT = """Question technicien terrain : {question}
|
|
|
|
Extraits de documentation disponibles :
|
|
{docs}
|
|
|
|
Réponds UNIQUEMENT au format demandé : Problème / Actions / Source."""
|
|
|
|
|
|
def detecter_produit_question(question: str) -> str | None:
|
|
q = question.lower()
|
|
for mot, produit in MOTS_PRODUITS.items():
|
|
if mot in q:
|
|
return produit
|
|
return None
|
|
|
|
|
|
class FAQService:
|
|
def __init__(self):
|
|
self.client = None
|
|
self.collection = None
|
|
if not _chroma_ok:
|
|
return
|
|
try:
|
|
os.makedirs("./data", exist_ok=True)
|
|
self.client = chromadb.PersistentClient(path="./data/chromadb")
|
|
self.collection = self.client.get_or_create_collection(
|
|
name="etm_faq",
|
|
metadata={"hnsw:space": "cosine"},
|
|
)
|
|
except Exception as exc:
|
|
log.error("FAQService init : %s", exc)
|
|
|
|
@property
|
|
def disponible(self) -> bool:
|
|
return self.collection is not None and self.collection.count() > 0
|
|
|
|
def reset_collection(self) -> None:
|
|
"""Vider et recréer la collection ChromaDB."""
|
|
if not self.client:
|
|
return
|
|
try:
|
|
self.client.delete_collection("etm_faq")
|
|
except Exception:
|
|
pass
|
|
self.collection = self.client.get_or_create_collection(
|
|
name="etm_faq",
|
|
metadata={"hnsw:space": "cosine"},
|
|
)
|
|
|
|
def rechercher(self, question: str, n_results: int = 2) -> dict:
|
|
"""Recherche vectorielle avec filtre produit si détecté."""
|
|
produit = detecter_produit_question(question)
|
|
kwargs = {"query_texts": [question], "n_results": n_results}
|
|
|
|
if produit:
|
|
kwargs["where"] = {"produit": {"$eq": produit}}
|
|
|
|
try:
|
|
results = self.collection.query(**kwargs)
|
|
except Exception:
|
|
results = self.collection.query(query_texts=[question], n_results=n_results)
|
|
|
|
docs = results["documents"][0] if results["documents"] else []
|
|
metas = results["metadatas"][0] if results["metadatas"] else []
|
|
|
|
# Déduplication
|
|
seen, docs_uniques, metas_uniques = set(), [], []
|
|
for doc, meta in zip(docs, metas):
|
|
cle = doc[:80]
|
|
if cle not in seen:
|
|
seen.add(cle)
|
|
docs_uniques.append(doc)
|
|
metas_uniques.append(meta)
|
|
|
|
return {
|
|
"documents": docs_uniques,
|
|
"metadatas": metas_uniques,
|
|
"produit_detecte": produit,
|
|
}
|
|
|
|
def indexer_fiche_sav(self, ticket_id: str, produit: str, symptome: str,
|
|
cause: str, solution: str, chantier: str, date: str, duree_min: int) -> None:
|
|
if not self.collection:
|
|
return
|
|
document = f"Produit: {produit}\nSymptôme: {symptome}\nCause: {cause}\nSolution: {solution}"
|
|
self.collection.upsert(
|
|
documents=[document],
|
|
ids=[ticket_id],
|
|
metadatas=[{
|
|
"source": "experience_etm",
|
|
"produit": produit,
|
|
"chantier": chantier,
|
|
"date": date,
|
|
"duree_min": duree_min,
|
|
}],
|
|
)
|
|
|
|
def indexer_document(self, texte: str, source: str, produit: str = "") -> int:
|
|
"""Indexation texte brut (fallback si pas de pymupdf)."""
|
|
if not self.collection or not texte.strip():
|
|
return 0
|
|
mots = texte.split()
|
|
chunk_size = 400
|
|
chunks = [" ".join(mots[i:i + chunk_size]) for i in range(0, len(mots), int(chunk_size * 0.8))]
|
|
try:
|
|
self.collection.delete(where={"source": source})
|
|
except Exception:
|
|
pass
|
|
self.collection.upsert(
|
|
documents=chunks,
|
|
ids=[f"{source}_chunk_{i}" for i in range(len(chunks))],
|
|
metadatas=[{"source": source, "produit": produit, "type": "documentation"}] * len(chunks),
|
|
)
|
|
return len(chunks)
|
|
|
|
def formater_sans_ollama(self, question: str, docs: list, metas: list) -> str:
|
|
lignes = [f"📚 FAQ ETM\n{'━' * 22}"]
|
|
for doc, meta in zip(docs[:2], metas[:2]):
|
|
source = meta.get("source", "")
|
|
page = meta.get("page", "")
|
|
titre = meta.get("titre", "")
|
|
label = f"📄 {source}"
|
|
if page:
|
|
label += f" — p.{page}"
|
|
if titre:
|
|
label += f"\n{titre}"
|
|
lignes.append(label)
|
|
lignes.append(doc[:250].strip())
|
|
lignes.append("")
|
|
lignes.append("━" * 22)
|
|
return "\n".join(lignes)
|
|
|
|
async def repondre(self, question: str) -> str:
|
|
if not self.collection or self.collection.count() == 0:
|
|
return "❓ Base vide — lance /sync_docs pour indexer les fiches techniques."
|
|
|
|
resultats = self.rechercher(question)
|
|
if not resultats["documents"]:
|
|
return "❓ Aucune solution connue pour ce problème."
|
|
|
|
if config.OLLAMA_ENABLED and _ollama_ok:
|
|
try:
|
|
response = ollama_lib.chat(
|
|
model=config.OLLAMA_MODEL,
|
|
messages=[
|
|
{"role": "system", "content": SYSTEM_PROMPT},
|
|
{"role": "user", "content": USER_PROMPT.format(
|
|
question=question,
|
|
docs="\n\n".join(resultats["documents"]),
|
|
)},
|
|
],
|
|
)
|
|
return response["message"]["content"]
|
|
except Exception as exc:
|
|
log.error("Ollama : %s", exc)
|
|
|
|
return self.formater_sans_ollama(question, resultats["documents"], resultats["metadatas"])
|
|
|
|
|
|
def extract_pdf_text(pdf_bytes: bytes) -> str:
|
|
"""Fallback extraction texte brut (pypdf) si pymupdf non dispo."""
|
|
try:
|
|
from io import BytesIO
|
|
import pypdf
|
|
reader = pypdf.PdfReader(BytesIO(pdf_bytes))
|
|
pages = [p.extract_text() for p in reader.pages if p.extract_text()]
|
|
return "\n\n".join(p.strip() for p in pages)
|
|
except Exception as exc:
|
|
log.error("extract_pdf_text : %s", exc)
|
|
return ""
|
|
|
|
|
|
faq_service = FAQService()
|