IT_Server/modules/ocr_module_mmocr.py

import os
import cv2
import numpy as np
import logging
from typing import List, Dict, Any
from mmocr.apis import TextDetInferencer, TextRecogInferencer

class OCRModule:
    def __init__(self, det_config: str, det_checkpoint: str,
                       rec_config: str, rec_checkpoint: str,
                       logger=None):
        self.logger = logger or logging.getLogger(__name__)
        os.environ['CUDA_VISIBLE_DEVICES'] = ''

        # MMOCR detection 및 recognition inferencer 초기화
        self.det_infer = TextDetInferencer(model=dict(config=det_config, ckpt=det_checkpoint), device='cpu')
        self.rec_infer = TextRecogInferencer(model=dict(config=rec_config, ckpt=rec_checkpoint), device='cpu')
        self.logger.info("✅ MMOCR detection 및 recognition 모델 초기화 완료")

    def detect_text(self, image_path: str, method: str = 'polygon') -> List[Dict[str, Any]]:
        if not os.path.exists(image_path):
            self.logger.error(f"이미지 파일을 찾을 수 없습니다: {image_path}")
            return []

        img = cv2.imread(image_path)
        if img is None:
            self.logger.error(f"이미지를 읽을 수 없습니다: {image_path}")
            return []
        self.logger.info(f"❇️ OCR 감지(method={method}) 시작")

        # 1) 텍스트 영역 감지
        det_res = self.det_infer(image_path)
        polys = det_res[0]['boundary_result']
        self.logger.info(f"👉 감지된 텍스트 영역 수: {len(polys)}")

        # 2) 영역 crop 후 recognition
        crops = [self._crop_poly(img, poly) for poly in polys]
        rec_res = self.rec_infer(crops)
        self.logger.info("📖 텍스트 인식 완료")

        ocr_results = []
        for poly, rec in zip(polys, rec_res):
            text, score = rec
            x, y, w, h = cv2.boundingRect(np.array(poly, dtype=np.int32))
            ocr_results.append({
                'text': text,
                'confidence': float(score),
                'polygon': poly,
                'bbox': (int(x), int(y), int(w), int(h)),
                'method': method
            })

        return ocr_results

    def filter_chinese_text(self, ocr_results: List[Dict]) -> List[Dict]:
        chinese = [r for r in ocr_results if any('\u4e00' <= c <= '\u9fff' for c in r['text'])]
        self.logger.info(f"중국어 텍스트 {len(chinese)}개 필터링 완료")
        return chinese

    def _crop_poly(self, img: np.ndarray, poly: List[List[int]]) -> np.ndarray:
        mask = np.zeros(img.shape[:2], dtype=np.uint8)
        cv2.fillPoly(mask, [np.array(poly, dtype=np.int32)], 255)
        x, y, w, h = cv2.boundingRect(np.array(poly, dtype=np.int32))
        return cv2.bitwise_and(img[y:y+h, x:x+w], img[y:y+h, x:x+w], mask=mask[y:y+h, x:x+w])