IT_Server/modules/ocr_module_mmocr.py

65 lines
2.7 KiB
Python

import os
import cv2
import numpy as np
import logging
from typing import List, Dict, Any
from mmocr.apis import TextDetInferencer, TextRecogInferencer
class OCRModule:
def __init__(self, det_config: str, det_checkpoint: str,
rec_config: str, rec_checkpoint: str,
logger=None):
self.logger = logger or logging.getLogger(__name__)
os.environ['CUDA_VISIBLE_DEVICES'] = ''
# MMOCR detection 및 recognition inferencer 초기화
self.det_infer = TextDetInferencer(model=dict(config=det_config, ckpt=det_checkpoint), device='cpu')
self.rec_infer = TextRecogInferencer(model=dict(config=rec_config, ckpt=rec_checkpoint), device='cpu')
self.logger.info("✅ MMOCR detection 및 recognition 모델 초기화 완료")
def detect_text(self, image_path: str, method: str = 'polygon') -> List[Dict[str, Any]]:
if not os.path.exists(image_path):
self.logger.error(f"이미지 파일을 찾을 수 없습니다: {image_path}")
return []
img = cv2.imread(image_path)
if img is None:
self.logger.error(f"이미지를 읽을 수 없습니다: {image_path}")
return []
self.logger.info(f"❇️ OCR 감지(method={method}) 시작")
# 1) 텍스트 영역 감지
det_res = self.det_infer(image_path)
polys = det_res[0]['boundary_result']
self.logger.info(f"👉 감지된 텍스트 영역 수: {len(polys)}")
# 2) 영역 crop 후 recognition
crops = [self._crop_poly(img, poly) for poly in polys]
rec_res = self.rec_infer(crops)
self.logger.info("📖 텍스트 인식 완료")
ocr_results = []
for poly, rec in zip(polys, rec_res):
text, score = rec
x, y, w, h = cv2.boundingRect(np.array(poly, dtype=np.int32))
ocr_results.append({
'text': text,
'confidence': float(score),
'polygon': poly,
'bbox': (int(x), int(y), int(w), int(h)),
'method': method
})
return ocr_results
def filter_chinese_text(self, ocr_results: List[Dict]) -> List[Dict]:
chinese = [r for r in ocr_results if any('\u4e00' <= c <= '\u9fff' for c in r['text'])]
self.logger.info(f"중국어 텍스트 {len(chinese)}개 필터링 완료")
return chinese
def _crop_poly(self, img: np.ndarray, poly: List[List[int]]) -> np.ndarray:
mask = np.zeros(img.shape[:2], dtype=np.uint8)
cv2.fillPoly(mask, [np.array(poly, dtype=np.int32)], 255)
x, y, w, h = cv2.boundingRect(np.array(poly, dtype=np.int32))
return cv2.bitwise_and(img[y:y+h, x:x+w], img[y:y+h, x:x+w], mask=mask[y:y+h, x:x+w])