158 lines
4.8 KiB
Python
158 lines
4.8 KiB
Python
"""
|
|
정비지침서(또는 추가 지식) JSON 로딩/검색 기반
|
|
|
|
목표:
|
|
- 사용자가 JSON 형태로 정비 지침서를 추가해두면 앱이 읽어서 AI 프롬프트에 관련 발췌를 넣을 수 있게 함
|
|
|
|
기본 경로:
|
|
- resources/maintenance_kb/*.json
|
|
- ~/.mmi_analyzer/maintenance_kb/*.json
|
|
|
|
JSON 포맷(권장, 유연하게 파싱):
|
|
[
|
|
{
|
|
"id": "door_001",
|
|
"title": "출입문 불일치 대응",
|
|
"tags": ["door", "psd", "fault"],
|
|
"keywords": ["출입문", "PSD", "불일치", "DOORSTAT"],
|
|
"content": "정비 절차 ...",
|
|
"source": "정비지침서 v1.2"
|
|
}
|
|
]
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
|
|
def _tokenize(text: str) -> List[str]:
|
|
text = (text or "").lower()
|
|
# 한글/영문/숫자 중심 토큰화 (아주 단순)
|
|
tokens = re.findall(r"[0-9a-z가-힣]{2,}", text)
|
|
return tokens
|
|
|
|
|
|
@dataclass
|
|
class KBDoc:
|
|
doc_id: str
|
|
title: str
|
|
content: str
|
|
tags: List[str]
|
|
keywords: List[str]
|
|
source: str
|
|
raw: Dict[str, Any]
|
|
|
|
|
|
class MaintenanceKnowledgeBase:
|
|
def __init__(self):
|
|
self._docs: List[KBDoc] = []
|
|
|
|
self._resource_dir = Path(__file__).resolve().parents[2] / "resources" / "maintenance_kb"
|
|
self._user_dir = Path.home() / ".mmi_analyzer" / "maintenance_kb"
|
|
|
|
self._resource_dir.mkdir(parents=True, exist_ok=True)
|
|
self._user_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def count(self) -> int:
|
|
return len(self._docs)
|
|
|
|
def reload(self) -> None:
|
|
self._docs = []
|
|
for base in [self._resource_dir, self._user_dir]:
|
|
for p in sorted(base.glob("*.json")):
|
|
self._load_file(p)
|
|
|
|
def _load_file(self, path: Path) -> None:
|
|
try:
|
|
data = json.loads(path.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
return
|
|
|
|
items = data if isinstance(data, list) else [data]
|
|
for i, item in enumerate(items):
|
|
if not isinstance(item, dict):
|
|
continue
|
|
title = str(item.get("title") or item.get("name") or path.stem)
|
|
content = str(item.get("content") or item.get("text") or "")
|
|
tags = item.get("tags") or []
|
|
keywords = item.get("keywords") or []
|
|
source = str(item.get("source") or str(path.name))
|
|
doc_id = str(item.get("id") or f"{path.stem}:{i}")
|
|
if not content and item.get("steps"):
|
|
# steps 배열을 content로 합치기
|
|
try:
|
|
content = "\n".join([str(s) for s in item.get("steps")])
|
|
except Exception:
|
|
content = ""
|
|
|
|
self._docs.append(
|
|
KBDoc(
|
|
doc_id=doc_id,
|
|
title=title,
|
|
content=content,
|
|
tags=list(tags) if isinstance(tags, list) else [str(tags)],
|
|
keywords=list(keywords) if isinstance(keywords, list) else [str(keywords)],
|
|
source=source,
|
|
raw=item,
|
|
)
|
|
)
|
|
|
|
def search(self, query: str, top_k: int = 3) -> List[Dict[str, Any]]:
|
|
q_tokens = set(_tokenize(query))
|
|
if not q_tokens or not self._docs:
|
|
return []
|
|
|
|
scored: List[Tuple[float, KBDoc]] = []
|
|
for d in self._docs:
|
|
hay = " ".join([d.title, d.content, " ".join(d.tags), " ".join(d.keywords)]).lower()
|
|
h_tokens = _tokenize(hay)
|
|
if not h_tokens:
|
|
continue
|
|
|
|
# 단순 점수: 토큰 매칭 수 + 키워드/태그 가중치
|
|
base = 0.0
|
|
for t in q_tokens:
|
|
if t in hay:
|
|
base += 1.0
|
|
for kw in d.keywords:
|
|
if kw and kw.lower() in query.lower():
|
|
base += 2.0
|
|
scored.append((base, d))
|
|
|
|
scored.sort(key=lambda x: x[0], reverse=True)
|
|
results = []
|
|
for score, d in scored[: max(0, top_k)]:
|
|
if score <= 0:
|
|
continue
|
|
snippet = d.content.strip().replace("\r\n", "\n")
|
|
if len(snippet) > 600:
|
|
snippet = snippet[:600] + "..."
|
|
results.append(
|
|
{
|
|
"id": d.doc_id,
|
|
"title": d.title,
|
|
"score": score,
|
|
"source": d.source,
|
|
"snippet": snippet,
|
|
}
|
|
)
|
|
return results
|
|
|
|
|
|
_kb_singleton: Optional[MaintenanceKnowledgeBase] = None
|
|
|
|
|
|
def get_maintenance_kb() -> MaintenanceKnowledgeBase:
|
|
global _kb_singleton
|
|
if _kb_singleton is None:
|
|
_kb_singleton = MaintenanceKnowledgeBase()
|
|
_kb_singleton.reload()
|
|
return _kb_singleton
|
|
|
|
|