import os import cv2 import numpy as np import logging from typing import List, Dict, Any from mmocr.apis import TextDetInferencer, TextRecogInferencer class OCRModule: def __init__(self, det_config: str, det_checkpoint: str, rec_config: str, rec_checkpoint: str, logger=None): self.logger = logger or logging.getLogger(__name__) os.environ['CUDA_VISIBLE_DEVICES'] = '' # MMOCR detection 및 recognition inferencer 초기화 self.det_infer = TextDetInferencer(model=dict(config=det_config, ckpt=det_checkpoint), device='cpu') self.rec_infer = TextRecogInferencer(model=dict(config=rec_config, ckpt=rec_checkpoint), device='cpu') self.logger.info("✅ MMOCR detection 및 recognition 모델 초기화 완료") def detect_text(self, image_path: str, method: str = 'polygon') -> List[Dict[str, Any]]: if not os.path.exists(image_path): self.logger.error(f"이미지 파일을 찾을 수 없습니다: {image_path}") return [] img = cv2.imread(image_path) if img is None: self.logger.error(f"이미지를 읽을 수 없습니다: {image_path}") return [] self.logger.info(f"❇️ OCR 감지(method={method}) 시작") # 1) 텍스트 영역 감지 det_res = self.det_infer(image_path) polys = det_res[0]['boundary_result'] self.logger.info(f"👉 감지된 텍스트 영역 수: {len(polys)}") # 2) 영역 crop 후 recognition crops = [self._crop_poly(img, poly) for poly in polys] rec_res = self.rec_infer(crops) self.logger.info("📖 텍스트 인식 완료") ocr_results = [] for poly, rec in zip(polys, rec_res): text, score = rec x, y, w, h = cv2.boundingRect(np.array(poly, dtype=np.int32)) ocr_results.append({ 'text': text, 'confidence': float(score), 'polygon': poly, 'bbox': (int(x), int(y), int(w), int(h)), 'method': method }) return ocr_results def filter_chinese_text(self, ocr_results: List[Dict]) -> List[Dict]: chinese = [r for r in ocr_results if any('\u4e00' <= c <= '\u9fff' for c in r['text'])] self.logger.info(f"중국어 텍스트 {len(chinese)}개 필터링 완료") return chinese def _crop_poly(self, img: np.ndarray, poly: List[List[int]]) -> np.ndarray: mask = np.zeros(img.shape[:2], dtype=np.uint8) cv2.fillPoly(mask, [np.array(poly, dtype=np.int32)], 255) x, y, w, h = cv2.boundingRect(np.array(poly, dtype=np.int32)) return cv2.bitwise_and(img[y:y+h, x:x+w], img[y:y+h, x:x+w], mask=mask[y:y+h, x:x+w])