111 lines
3.8 KiB
Python

import logging
from io import BytesIO
import httpx
try:
import pypdf
_pypdf_ok = True
except ImportError:
_pypdf_ok = False
log_tmp = logging.getLogger(__name__)
log_tmp.warning("pypdf non installé — extraction PDF désactivée. pip install pypdf")
log = logging.getLogger(__name__)
try:
import chromadb
_chroma_ok = True
except ImportError:
_chroma_ok = False
log.warning("chromadb non installé — FAQ SAV désactivée. Installez : pip install chromadb")
OLLAMA_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "phi3"
class SAVAssistant:
def __init__(self):
self._collection = None
if not _chroma_ok:
return
try:
db = chromadb.PersistentClient(path="./sav_knowledge")
self._collection = db.get_or_create_collection("docs_techniques")
except Exception as exc:
log.error("SAVAssistant init : %s", exc)
@property
def disponible(self) -> bool:
return self._collection is not None and self._collection.count() > 0
async def indexer_document(self, texte: str, source: str) -> int:
if not self._collection:
return 0
chunks = [texte[i:i + 500] for i in range(0, len(texte), 400)]
try:
self._collection.delete(where={"source": source})
except Exception:
pass
self._collection.add(
documents=chunks,
ids=[f"{source}_{i}" for i in range(len(chunks))],
metadatas=[{"source": source}] * len(chunks),
)
log.info("Indexé %d chunks depuis '%s'", len(chunks), source)
return len(chunks)
async def chercher(self, question: str, n_results: int = 3) -> str | None:
if not self.disponible:
return None
try:
results = self._collection.query(query_texts=[question], n_results=n_results)
extraits = results["documents"][0]
sources = [m["source"] for m in results["metadatas"][0]]
if not extraits:
return None
# Reformuler avec Ollama si disponible
try:
async with httpx.AsyncClient(timeout=30) as client:
r = await client.post(
OLLAMA_URL,
json={
"model": OLLAMA_MODEL,
"prompt": (
f"Problème terrain : {question}\n\n"
"Documentation :\n" + "\n\n".join(extraits) +
"\n\nDonne 3 pistes de diagnostic en français, "
"en langage simple pour un technicien de terrain."
),
"stream": False,
},
)
if r.status_code == 200:
return r.json().get("response", "").strip() or None
except Exception:
pass
# Fallback : extraits bruts
return "\n\n---\n\n".join(f"📄 *{s}*\n{e}" for s, e in zip(sources, extraits))
except Exception as exc:
log.error("SAVAssistant.chercher : %s", exc)
return None
sav_assistant = SAVAssistant()
def extract_pdf_text(pdf_bytes: bytes) -> str:
"""Extrait le texte d'un PDF. Retourne une chaîne vide si non lisible."""
if not _pypdf_ok:
return ""
try:
reader = pypdf.PdfReader(BytesIO(pdf_bytes))
pages = []
for page in reader.pages:
text = page.extract_text()
if text:
pages.append(text.strip())
return "\n\n".join(pages)
except Exception as exc:
logging.getLogger(__name__).error("extract_pdf_text : %s", exc)
return ""