""" 정비지침서(또는 추가 지식) JSON 로딩/검색 기반 목표: - 사용자가 JSON 형태로 정비 지침서를 추가해두면 앱이 읽어서 AI 프롬프트에 관련 발췌를 넣을 수 있게 함 기본 경로: - resources/maintenance_kb/*.json - ~/.mmi_analyzer/maintenance_kb/*.json JSON 포맷(권장, 유연하게 파싱): [ { "id": "door_001", "title": "출입문 불일치 대응", "tags": ["door", "psd", "fault"], "keywords": ["출입문", "PSD", "불일치", "DOORSTAT"], "content": "정비 절차 ...", "source": "정비지침서 v1.2" } ] """ from __future__ import annotations import json import re from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional, Tuple def _tokenize(text: str) -> List[str]: text = (text or "").lower() # 한글/영문/숫자 중심 토큰화 (아주 단순) tokens = re.findall(r"[0-9a-z가-힣]{2,}", text) return tokens @dataclass class KBDoc: doc_id: str title: str content: str tags: List[str] keywords: List[str] source: str raw: Dict[str, Any] class MaintenanceKnowledgeBase: def __init__(self): self._docs: List[KBDoc] = [] self._resource_dir = Path(__file__).resolve().parents[2] / "resources" / "maintenance_kb" self._user_dir = Path.home() / ".mmi_analyzer" / "maintenance_kb" self._resource_dir.mkdir(parents=True, exist_ok=True) self._user_dir.mkdir(parents=True, exist_ok=True) def count(self) -> int: return len(self._docs) def reload(self) -> None: self._docs = [] for base in [self._resource_dir, self._user_dir]: for p in sorted(base.glob("*.json")): self._load_file(p) def _load_file(self, path: Path) -> None: try: data = json.loads(path.read_text(encoding="utf-8")) except Exception: return items = data if isinstance(data, list) else [data] for i, item in enumerate(items): if not isinstance(item, dict): continue title = str(item.get("title") or item.get("name") or path.stem) content = str(item.get("content") or item.get("text") or "") tags = item.get("tags") or [] keywords = item.get("keywords") or [] source = str(item.get("source") or str(path.name)) doc_id = str(item.get("id") or f"{path.stem}:{i}") if not content and item.get("steps"): # steps 배열을 content로 합치기 try: content = "\n".join([str(s) for s in item.get("steps")]) except Exception: content = "" self._docs.append( KBDoc( doc_id=doc_id, title=title, content=content, tags=list(tags) if isinstance(tags, list) else [str(tags)], keywords=list(keywords) if isinstance(keywords, list) else [str(keywords)], source=source, raw=item, ) ) def search(self, query: str, top_k: int = 3) -> List[Dict[str, Any]]: q_tokens = set(_tokenize(query)) if not q_tokens or not self._docs: return [] scored: List[Tuple[float, KBDoc]] = [] for d in self._docs: hay = " ".join([d.title, d.content, " ".join(d.tags), " ".join(d.keywords)]).lower() h_tokens = _tokenize(hay) if not h_tokens: continue # 단순 점수: 토큰 매칭 수 + 키워드/태그 가중치 base = 0.0 for t in q_tokens: if t in hay: base += 1.0 for kw in d.keywords: if kw and kw.lower() in query.lower(): base += 2.0 scored.append((base, d)) scored.sort(key=lambda x: x[0], reverse=True) results = [] for score, d in scored[: max(0, top_k)]: if score <= 0: continue snippet = d.content.strip().replace("\r\n", "\n") if len(snippet) > 600: snippet = snippet[:600] + "..." results.append( { "id": d.doc_id, "title": d.title, "score": score, "source": d.source, "snippet": snippet, } ) return results _kb_singleton: Optional[MaintenanceKnowledgeBase] = None def get_maintenance_kb() -> MaintenanceKnowledgeBase: global _kb_singleton if _kb_singleton is None: _kb_singleton = MaintenanceKnowledgeBase() _kb_singleton.reload() return _kb_singleton