65 lines
2.7 KiB
Python
65 lines
2.7 KiB
Python
import os
|
|
import cv2
|
|
import numpy as np
|
|
import logging
|
|
from typing import List, Dict, Any
|
|
from mmocr.apis import TextDetInferencer, TextRecogInferencer
|
|
|
|
class OCRModule:
|
|
def __init__(self, det_config: str, det_checkpoint: str,
|
|
rec_config: str, rec_checkpoint: str,
|
|
logger=None):
|
|
self.logger = logger or logging.getLogger(__name__)
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
|
|
|
# MMOCR detection 및 recognition inferencer 초기화
|
|
self.det_infer = TextDetInferencer(model=dict(config=det_config, ckpt=det_checkpoint), device='cpu')
|
|
self.rec_infer = TextRecogInferencer(model=dict(config=rec_config, ckpt=rec_checkpoint), device='cpu')
|
|
self.logger.info("✅ MMOCR detection 및 recognition 모델 초기화 완료")
|
|
|
|
def detect_text(self, image_path: str, method: str = 'polygon') -> List[Dict[str, Any]]:
|
|
if not os.path.exists(image_path):
|
|
self.logger.error(f"이미지 파일을 찾을 수 없습니다: {image_path}")
|
|
return []
|
|
|
|
img = cv2.imread(image_path)
|
|
if img is None:
|
|
self.logger.error(f"이미지를 읽을 수 없습니다: {image_path}")
|
|
return []
|
|
self.logger.info(f"❇️ OCR 감지(method={method}) 시작")
|
|
|
|
# 1) 텍스트 영역 감지
|
|
det_res = self.det_infer(image_path)
|
|
polys = det_res[0]['boundary_result']
|
|
self.logger.info(f"👉 감지된 텍스트 영역 수: {len(polys)}")
|
|
|
|
# 2) 영역 crop 후 recognition
|
|
crops = [self._crop_poly(img, poly) for poly in polys]
|
|
rec_res = self.rec_infer(crops)
|
|
self.logger.info("📖 텍스트 인식 완료")
|
|
|
|
ocr_results = []
|
|
for poly, rec in zip(polys, rec_res):
|
|
text, score = rec
|
|
x, y, w, h = cv2.boundingRect(np.array(poly, dtype=np.int32))
|
|
ocr_results.append({
|
|
'text': text,
|
|
'confidence': float(score),
|
|
'polygon': poly,
|
|
'bbox': (int(x), int(y), int(w), int(h)),
|
|
'method': method
|
|
})
|
|
|
|
return ocr_results
|
|
|
|
def filter_chinese_text(self, ocr_results: List[Dict]) -> List[Dict]:
|
|
chinese = [r for r in ocr_results if any('\u4e00' <= c <= '\u9fff' for c in r['text'])]
|
|
self.logger.info(f"중국어 텍스트 {len(chinese)}개 필터링 완료")
|
|
return chinese
|
|
|
|
def _crop_poly(self, img: np.ndarray, poly: List[List[int]]) -> np.ndarray:
|
|
mask = np.zeros(img.shape[:2], dtype=np.uint8)
|
|
cv2.fillPoly(mask, [np.array(poly, dtype=np.int32)], 255)
|
|
x, y, w, h = cv2.boundingRect(np.array(poly, dtype=np.int32))
|
|
return cv2.bitwise_and(img[y:y+h, x:x+w], img[y:y+h, x:x+w], mask=mask[y:y+h, x:x+w])
|