AI_MMI_Analyser/app/ai/maintenance_kb.py

158 lines
4.8 KiB
Python

"""
정비지침서(또는 추가 지식) JSON 로딩/검색 기반
목표:
- 사용자가 JSON 형태로 정비 지침서를 추가해두면 앱이 읽어서 AI 프롬프트에 관련 발췌를 넣을 수 있게 함
기본 경로:
- resources/maintenance_kb/*.json
- ~/.mmi_analyzer/maintenance_kb/*.json
JSON 포맷(권장, 유연하게 파싱):
[
{
"id": "door_001",
"title": "출입문 불일치 대응",
"tags": ["door", "psd", "fault"],
"keywords": ["출입문", "PSD", "불일치", "DOORSTAT"],
"content": "정비 절차 ...",
"source": "정비지침서 v1.2"
}
]
"""
from __future__ import annotations
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
def _tokenize(text: str) -> List[str]:
text = (text or "").lower()
# 한글/영문/숫자 중심 토큰화 (아주 단순)
tokens = re.findall(r"[0-9a-z가-힣]{2,}", text)
return tokens
@dataclass
class KBDoc:
doc_id: str
title: str
content: str
tags: List[str]
keywords: List[str]
source: str
raw: Dict[str, Any]
class MaintenanceKnowledgeBase:
def __init__(self):
self._docs: List[KBDoc] = []
self._resource_dir = Path(__file__).resolve().parents[2] / "resources" / "maintenance_kb"
self._user_dir = Path.home() / ".mmi_analyzer" / "maintenance_kb"
self._resource_dir.mkdir(parents=True, exist_ok=True)
self._user_dir.mkdir(parents=True, exist_ok=True)
def count(self) -> int:
return len(self._docs)
def reload(self) -> None:
self._docs = []
for base in [self._resource_dir, self._user_dir]:
for p in sorted(base.glob("*.json")):
self._load_file(p)
def _load_file(self, path: Path) -> None:
try:
data = json.loads(path.read_text(encoding="utf-8"))
except Exception:
return
items = data if isinstance(data, list) else [data]
for i, item in enumerate(items):
if not isinstance(item, dict):
continue
title = str(item.get("title") or item.get("name") or path.stem)
content = str(item.get("content") or item.get("text") or "")
tags = item.get("tags") or []
keywords = item.get("keywords") or []
source = str(item.get("source") or str(path.name))
doc_id = str(item.get("id") or f"{path.stem}:{i}")
if not content and item.get("steps"):
# steps 배열을 content로 합치기
try:
content = "\n".join([str(s) for s in item.get("steps")])
except Exception:
content = ""
self._docs.append(
KBDoc(
doc_id=doc_id,
title=title,
content=content,
tags=list(tags) if isinstance(tags, list) else [str(tags)],
keywords=list(keywords) if isinstance(keywords, list) else [str(keywords)],
source=source,
raw=item,
)
)
def search(self, query: str, top_k: int = 3) -> List[Dict[str, Any]]:
q_tokens = set(_tokenize(query))
if not q_tokens or not self._docs:
return []
scored: List[Tuple[float, KBDoc]] = []
for d in self._docs:
hay = " ".join([d.title, d.content, " ".join(d.tags), " ".join(d.keywords)]).lower()
h_tokens = _tokenize(hay)
if not h_tokens:
continue
# 단순 점수: 토큰 매칭 수 + 키워드/태그 가중치
base = 0.0
for t in q_tokens:
if t in hay:
base += 1.0
for kw in d.keywords:
if kw and kw.lower() in query.lower():
base += 2.0
scored.append((base, d))
scored.sort(key=lambda x: x[0], reverse=True)
results = []
for score, d in scored[: max(0, top_k)]:
if score <= 0:
continue
snippet = d.content.strip().replace("\r\n", "\n")
if len(snippet) > 600:
snippet = snippet[:600] + "..."
results.append(
{
"id": d.doc_id,
"title": d.title,
"score": score,
"source": d.source,
"snippet": snippet,
}
)
return results
_kb_singleton: Optional[MaintenanceKnowledgeBase] = None
def get_maintenance_kb() -> MaintenanceKnowledgeBase:
global _kb_singleton
if _kb_singleton is None:
_kb_singleton = MaintenanceKnowledgeBase()
_kb_singleton.reload()
return _kb_singleton